From dfc4d618dd31c00e339344962a839db5208ade8f Mon Sep 17 00:00:00 2001 From: Wooster Date: Thu, 3 Aug 2023 09:02:24 +0200 Subject: [PATCH 1/3] general-use std.debug.hexdump for printing hexdumps Recently, when I've been working with structures of data that is not directly in RAM but rather laid out in bytes somewhere else, it was always very useful to print out maybe the next 50 bytes or the previous 50 bytes or so to see what's ahead or before me. I would usually do this with a quick `std.debug.print("{any}\n", .{bytes});` or something but the output is not as nice obviously. --- lib/std/debug.zig | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 179e821da2..4d7c7ed9a3 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -104,6 +104,63 @@ pub fn getSelfDebugInfo() !*DebugInfo { } } +/// Tries to print a hexadecimal view of the bytes, unbuffered, and ignores any error returned. +pub fn hexdump(bytes: []const u8) void { + hexdump_internal(bytes) catch {}; +} + +fn hexdump_internal(bytes: []const u8) !void { + const stderr = std.io.getStdErr(); + const ttyconf = std.io.tty.detectConfig(stderr); + const writer = stderr.writer(); + var chunks = mem.window(u8, bytes, 16, 16); + while (chunks.next()) |window| { + // 1. Print the address. + const address = (@intFromPtr(bytes.ptr) + 0x10 * (chunks.index orelse 0) / 16) - 0x10; + try ttyconf.setColor(writer, .dim); + // We print the address in lowercase and the bytes in uppercase hexadecimal to distinguish them more. + // Also, make sure all lines are aligned by padding the address. + try writer.print("{x:0>[1]} ", .{ address, @sizeOf(usize) * 2 }); + try ttyconf.setColor(writer, .reset); + + // 2. Print the bytes. + for (window, 0..) |byte, index| { + try writer.print("{X:0>2} ", .{byte}); + if (index == 7) try writer.writeByte(' '); + } + try writer.writeByte(' '); + if (window.len < 16) { + var missing_columns = (16 - window.len) * 3; + if (window.len < 8) missing_columns += 1; + try writer.writeByteNTimes(' ', missing_columns); + } + + // 3. Print the characters. + for (window) |byte| { + if (std.ascii.isPrint(byte)) { + try writer.writeByte(byte); + } else { + // TODO: remove this `if` when https://github.com/ziglang/zig/issues/7600 is fixed + if (ttyconf == .windows_api) { + try writer.writeByte('.'); + continue; + } + + // Let's print some common control codes as graphical Unicode symbols. + // We don't want to do this for all control codes because most control codes apart from + // the ones that Zig has escape sequences for are likely not very useful to print as symbols. + switch (byte) { + '\n' => try writer.writeAll("␊"), + '\r' => try writer.writeAll("␍"), + '\t' => try writer.writeAll("␉"), + else => try writer.writeByte('.'), + } + } + } + try writer.writeByte('\n'); + } +} + /// Tries to print the current stack trace to stderr, unbuffered, and ignores any error returned. /// TODO multithreaded awareness pub fn dumpCurrentStackTrace(start_addr: ?usize) void { From ad7a09d95a3867ac9d8230c4b9694f711d09390e Mon Sep 17 00:00:00 2001 From: Wooster Date: Thu, 3 Aug 2023 09:42:04 +0200 Subject: [PATCH 2/3] std.testing.expectEqualSlices: some improvements This mainly replaces ChunkIterator with std.mem.window and also prints \n, \r, \t using Unicode symbols instead of periods because they're common non-printable characters. This same code exists in std.debug.hexdump. At some point maybe this code could be exposed through a public function. Then we could reuse the code in both places. --- lib/std/testing.zig | 69 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/lib/std/testing.zig b/lib/std/testing.zig index f79f877dea..6d3503ba1d 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -339,7 +339,8 @@ pub fn expectEqualSlices(comptime T: type, expected: []const T, actual: []const const actual_window = actual[window_start..@min(actual.len, window_start + max_window_size)]; const actual_truncated = window_start + actual_window.len < actual.len; - const ttyconf = std.io.tty.detectConfig(std.io.getStdErr()); + const stderr = std.io.getStdErr(); + const ttyconf = std.io.tty.detectConfig(stderr); var differ = if (T == u8) BytesDiffer{ .expected = expected_window, .actual = actual_window, @@ -350,7 +351,6 @@ pub fn expectEqualSlices(comptime T: type, expected: []const T, actual: []const .actual = actual_window, .ttyconf = ttyconf, }; - const stderr = std.io.getStdErr(); // Print indexes as hex for slices of u8 since it's more likely to be binary data where // that is usually useful. @@ -432,16 +432,17 @@ const BytesDiffer = struct { ttyconf: std.io.tty.Config, pub fn write(self: BytesDiffer, writer: anytype) !void { - var expected_iterator = ChunkIterator{ .bytes = self.expected }; + var expected_iterator = std.mem.window(u8, self.expected, 16, 16); + var row: usize = 0; while (expected_iterator.next()) |chunk| { // to avoid having to calculate diffs twice per chunk var diffs: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 }; - for (chunk, 0..) |byte, i| { - const absolute_byte_index = (expected_iterator.index - chunk.len) + i; + for (chunk, 0..) |byte, col| { + const absolute_byte_index = col + row * 16; const diff = if (absolute_byte_index < self.actual.len) self.actual[absolute_byte_index] != byte else true; - if (diff) diffs.set(i); - try self.writeByteDiff(writer, "{X:0>2} ", byte, diff); - if (i == 7) try writer.writeByte(' '); + if (diff) diffs.set(col); + try self.writeDiff(writer, "{X:0>2} ", .{byte}, diff); + if (col == 7) try writer.writeByte(' '); } try writer.writeByte(' '); if (chunk.len < 16) { @@ -449,33 +450,38 @@ const BytesDiffer = struct { if (chunk.len < 8) missing_columns += 1; try writer.writeByteNTimes(' ', missing_columns); } - for (chunk, 0..) |byte, i| { - const byte_to_print = if (std.ascii.isPrint(byte)) byte else '.'; - try self.writeByteDiff(writer, "{c}", byte_to_print, diffs.isSet(i)); + for (chunk, 0..) |byte, col| { + const diff = diffs.isSet(col); + if (std.ascii.isPrint(byte)) { + try self.writeDiff(writer, "{c}", .{byte}, diff); + } else { + // TODO: remove this `if` when https://github.com/ziglang/zig/issues/7600 is fixed + if (self.ttyconf == .windows_api) { + try self.writeDiff(writer, ".", .{}, diff); + continue; + } + + // Let's print some common control codes as graphical Unicode symbols. + // We don't want to do this for all control codes because most control codes apart from + // the ones that Zig has escape sequences for are likely not very useful to print as symbols. + switch (byte) { + '\n' => try self.writeDiff(writer, "␊", .{}, diff), + '\r' => try self.writeDiff(writer, "␍", .{}, diff), + '\t' => try self.writeDiff(writer, "␉", .{}, diff), + else => try self.writeDiff(writer, ".", .{}, diff), + } + } } try writer.writeByte('\n'); + row += 1; } } - fn writeByteDiff(self: BytesDiffer, writer: anytype, comptime fmt: []const u8, byte: u8, diff: bool) !void { + fn writeDiff(self: BytesDiffer, writer: anytype, comptime fmt: []const u8, args: anytype, diff: bool) !void { if (diff) try self.ttyconf.setColor(writer, .red); - try writer.print(fmt, .{byte}); + try writer.print(fmt, args); if (diff) try self.ttyconf.setColor(writer, .reset); } - - const ChunkIterator = struct { - bytes: []const u8, - index: usize = 0, - - pub fn next(self: *ChunkIterator) ?[]const u8 { - if (self.index == self.bytes.len) return null; - - const start_index = self.index; - const end_index = @min(self.bytes.len, start_index + 16); - self.index = end_index; - return self.bytes[start_index..end_index]; - } - }; }; test { @@ -926,11 +932,8 @@ fn printIndicatorLine(source: []const u8, indicator_index: usize) void { source.len; printLine(source[line_begin_index..line_end_index]); - { - var i: usize = line_begin_index; - while (i < indicator_index) : (i += 1) - print(" ", .{}); - } + for (line_begin_index..indicator_index) |_| + print(" ", .{}); if (indicator_index >= source.len) print("^ (end of string)\n", .{}) else @@ -947,7 +950,7 @@ fn printWithVisibleNewlines(source: []const u8) void { fn printLine(line: []const u8) void { if (line.len != 0) switch (line[line.len - 1]) { - ' ', '\t' => return print("{s}⏎\n", .{line}), // Carriage return symbol, + ' ', '\t' => return print("{s}⏎\n", .{line}), // Return symbol else => {}, }; print("{s}\n", .{line}); From 25d2e7fce04d5cbe63331cc56ab7bafe89c249c4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 8 Jan 2024 00:21:57 -0700 Subject: [PATCH 3/3] fixups from previous commit * rename the functions * make the other function public and give it a better name * interact with stderr_mutex * std lib test coverage --- lib/std/debug.zig | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4d7c7ed9a3..4670c49dfa 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -105,11 +105,15 @@ pub fn getSelfDebugInfo() !*DebugInfo { } /// Tries to print a hexadecimal view of the bytes, unbuffered, and ignores any error returned. -pub fn hexdump(bytes: []const u8) void { - hexdump_internal(bytes) catch {}; +/// Obtains the stderr mutex while dumping. +pub fn dump_hex(bytes: []const u8) void { + stderr_mutex.lock(); + defer stderr_mutex.unlock(); + dump_hex_fallible(bytes) catch {}; } -fn hexdump_internal(bytes: []const u8) !void { +/// Prints a hexadecimal view of the bytes, unbuffered, returning any error that occurs. +pub fn dump_hex_fallible(bytes: []const u8) !void { const stderr = std.io.getStdErr(); const ttyconf = std.io.tty.detectConfig(stderr); const writer = stderr.writer(); @@ -140,7 +144,7 @@ fn hexdump_internal(bytes: []const u8) !void { if (std.ascii.isPrint(byte)) { try writer.writeByte(byte); } else { - // TODO: remove this `if` when https://github.com/ziglang/zig/issues/7600 is fixed + // Related: https://github.com/ziglang/zig/issues/7600 if (ttyconf == .windows_api) { try writer.writeByte('.'); continue; @@ -2831,3 +2835,7 @@ pub fn ConfigurableTrace(comptime size: usize, comptime stack_frame_count: usize } }; } + +test { + _ = &dump_hex; +}