zig/lib/compiler/resinator/source_mapping.zig

1226 lines
49 KiB
Zig
Vendored

const std = @import("std");
const Allocator = std.mem.Allocator;
const utils = @import("utils.zig");
const UncheckedSliceWriter = utils.UncheckedSliceWriter;
pub const ParseLineCommandsResult = struct {
result: []u8,
mappings: SourceMappings,
};
const CurrentMapping = struct {
line_num: usize = 1,
filename: std.ArrayList(u8) = .empty,
pending: bool = true,
ignore_contents: bool = false,
};
pub const ParseAndRemoveLineCommandsOptions = struct {
initial_filename: ?[]const u8 = null,
};
/// Parses and removes #line commands as well as all source code that is within a file
/// with .c or .h extensions.
///
/// > RC treats files with the .c and .h extensions in a special manner. It
/// > assumes that a file with one of these extensions does not contain
/// > resources. If a file has the .c or .h file name extension, RC ignores all
/// > lines in the file except the preprocessor directives. Therefore, to
/// > include a file that contains resources in another resource script, give
/// > the file to be included an extension other than .c or .h.
/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
///
/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
/// between the lines and their corresponding lines in their original files.
///
/// `buf` must be at least as long as `source`
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
///
/// If `options.initial_filename` is provided, that filename is guaranteed to be
/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) error{ OutOfMemory, InvalidLineCommand, LineNumberOverflow }!ParseLineCommandsResult {
var parse_result = ParseLineCommandsResult{
.result = undefined,
.mappings = .{},
};
errdefer parse_result.mappings.deinit(allocator);
var current_mapping: CurrentMapping = .{};
defer current_mapping.filename.deinit(allocator);
if (options.initial_filename) |initial_filename| {
try current_mapping.filename.appendSlice(allocator, initial_filename);
parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
}
// This implementation attempts to be comment and string aware in order
// to avoid errant #line <num> "<filename>" within multiline comments
// leading to problems in the source mapping after comments are removed,
// but it is not a perfect implementation (intentionally).
//
// The current implementation does not handle cases like
// /* foo */ #line ...
// #line ... // foo
// #line ... /* foo ...
// etc
//
// (the first example will not be recognized as a #line command, the second
// and third will error with InvalidLineCommand)
//
// This is fine, though, since #line commands are generated by the
// preprocessor so in normal circumstances they will be well-formed and
// consistent. The only realistic way the imperfect implementation could
// affect a 'real' use-case would be someone taking the output of a
// preprocessor, editing it manually to add comments before/after #line
// commands, and then running it through resinator with /:no-preprocess.
std.debug.assert(buf.len >= source.len);
var result = UncheckedSliceWriter{ .slice = buf };
const State = enum {
line_start,
preprocessor,
non_preprocessor,
forward_slash,
line_comment,
multiline_comment,
multiline_comment_end,
single_quoted,
single_quoted_escape,
double_quoted,
double_quoted_escape,
};
var state: State = .line_start;
var index: usize = 0;
var pending_start: ?usize = null;
var preprocessor_start: usize = 0;
var line_number: usize = 1;
while (index < source.len) : (index += 1) {
var c = source[index];
state: switch (state) {
.line_start => switch (c) {
'#' => {
preprocessor_start = index;
state = .preprocessor;
if (pending_start == null) {
pending_start = index;
}
},
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
pending_start = null;
},
' ', '\t', '\x0b', '\x0c' => {
if (pending_start == null) {
pending_start = index;
}
},
'/' => {
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start orelse index .. index + 1]);
pending_start = null;
}
state = .forward_slash;
},
'\'' => {
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start orelse index .. index + 1]);
pending_start = null;
}
state = .single_quoted;
},
'"' => {
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start orelse index .. index + 1]);
pending_start = null;
}
state = .double_quoted;
},
else => {
state = .non_preprocessor;
if (pending_start != null) {
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start.? .. index + 1]);
}
pending_start = null;
continue;
}
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.forward_slash => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
'/' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .line_comment;
},
'*' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .multiline_comment;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .non_preprocessor;
},
},
.line_comment => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.multiline_comment => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
pending_start = null;
},
'*' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .multiline_comment_end;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.multiline_comment_end => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .multiline_comment;
pending_start = null;
},
'/' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .non_preprocessor;
},
'*' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
// stay in multiline_comment_end state
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .multiline_comment;
},
},
.single_quoted => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
'\\' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .single_quoted_escape;
},
'\'' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .non_preprocessor;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.single_quoted_escape => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .single_quoted;
},
},
.double_quoted => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
'\\' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .double_quoted_escape;
},
'"' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .non_preprocessor;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
.double_quoted_escape => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .double_quoted;
},
},
.preprocessor => switch (c) {
'\r', '\n' => {
// Now that we have the full line we can decide what to do with it
const preprocessor_str = source[preprocessor_start..index];
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
try handleLineCommand(allocator, preprocessor_str, &current_mapping);
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
} else {
// Backtrack and reparse the line in the non_preprocessor state,
// since it's possible that this line contains a multiline comment
// start, etc.
state = .non_preprocessor;
index = pending_start.?;
pending_start = null;
// TODO: This is a hacky way to implement this, c needs to be
// updated since we're using continue :state here
c = source[index];
// continue to avoid the index += 1 of the while loop
continue :state .non_preprocessor;
}
},
else => {},
},
.non_preprocessor => switch (c) {
'\r', '\n' => {
const is_crlf = formsLineEndingPair(source, c, index + 1);
if (!current_mapping.ignore_contents) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
result.write(c);
if (is_crlf) result.write(source[index + 1]);
line_number += 1;
}
if (is_crlf) index += 1;
state = .line_start;
pending_start = null;
},
'/' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .forward_slash;
},
'\'' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .single_quoted;
},
'"' => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
state = .double_quoted;
},
else => {
if (!current_mapping.ignore_contents) {
result.write(c);
}
},
},
}
} else {
switch (state) {
.line_start => {},
.forward_slash,
.line_comment,
.multiline_comment,
.multiline_comment_end,
.single_quoted,
.single_quoted_escape,
.double_quoted,
.double_quoted_escape,
.non_preprocessor,
=> {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
},
.preprocessor => {
// Now that we have the full line we can decide what to do with it
const preprocessor_str = source[preprocessor_start..index];
if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
try handleLineCommand(allocator, preprocessor_str, &current_mapping);
} else {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
if (!current_mapping.ignore_contents) {
result.writeSlice(source[pending_start.?..index]);
}
}
},
}
}
parse_result.result = result.getWritten();
// Remove whitespace from the end of the result. This avoids issues when the
// preprocessor adds a newline to the end of the file, since then the
// post-preprocessed source could have more lines than the corresponding input source and
// the inserted line can't be mapped to any lines in the original file.
// There's no way that whitespace at the end of a file can affect the parsing
// of the RC script so this is okay to do unconditionally.
// TODO: There might be a better way around this
while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
parse_result.result.len -= 1;
}
// If there have been no line mappings at all, then we're dealing with an empty file.
// In this case, we want to fake a line mapping just so that we return something
// that is useable in the same way that a non-empty mapping would be.
if (parse_result.mappings.sources.root == null) {
try handleLineEnd(allocator, line_number, &parse_result.mappings, &current_mapping);
}
return parse_result;
}
/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
if (next_index >= source.len) return false;
const next_ending = source[next_index];
return utils.isLineEndingPair(line_ending, next_ending);
}
pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
try mapping.set(post_processed_line_number, current_mapping.line_num, filename_offset);
current_mapping.line_num = std.math.add(usize, current_mapping.line_num, 1) catch return error.LineNumberOverflow;
current_mapping.pending = false;
}
// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{ OutOfMemory, InvalidLineCommand }!void {
// TODO: Are there other whitespace characters that should be included?
var tokenizer = std.mem.tokenizeAny(u8, line_command, " \t");
const line_directive = tokenizer.next() orelse return error.InvalidLineCommand; // #line
if (!std.mem.eql(u8, line_directive, "#line")) return error.InvalidLineCommand;
const linenum_str = tokenizer.next() orelse return error.InvalidLineCommand;
const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return error.InvalidLineCommand;
if (linenum == 0) return error.InvalidLineCommand;
var filename_literal = tokenizer.rest();
while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
filename_literal.len -= 1;
}
if (filename_literal.len < 2) return error.InvalidLineCommand;
const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
if (!is_quoted) return error.InvalidLineCommand;
const unquoted_filename = filename_literal[1 .. filename_literal.len - 1];
// Ignore <builtin> and <command line>
if (std.mem.eql(u8, unquoted_filename, "<builtin>") or std.mem.eql(u8, unquoted_filename, "<command line>")) return;
const filename = parseFilename(allocator, unquoted_filename) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
else => return error.InvalidLineCommand,
};
defer allocator.free(filename);
// \x00 bytes in the filename is incompatible with how StringTable works
if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return error.InvalidLineCommand;
current_mapping.line_num = linenum;
current_mapping.filename.clearRetainingCapacity();
try current_mapping.filename.appendSlice(allocator, filename);
current_mapping.pending = true;
current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
}
pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
const buf = try allocator.alloc(u8, source.len);
errdefer allocator.free(buf);
var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
result.result = try allocator.realloc(buf, result.result.len);
return result;
}
/// C-style string parsing with a few caveats:
/// - The str cannot contain newlines or carriage returns
/// - Hex and octal escape are limited to u8
/// - No handling/support for L, u, or U prefixed strings
/// - The start and end double quotes should be omitted from the `str`
/// - Other than the above, does not assume any validity of the strings (i.e. there
/// may be unescaped double quotes within the str) and will return error.InvalidString
/// on any problems found.
///
/// The result is a UTF-8 encoded string.
fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
const State = enum {
string,
escape,
escape_hex,
escape_octal,
escape_u,
};
var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
errdefer filename.deinit(allocator);
var state: State = .string;
var index: usize = 0;
var escape_len: usize = undefined;
var escape_val: u64 = undefined;
var escape_expected_len: u8 = undefined;
while (index < str.len) : (index += 1) {
const c = str[index];
switch (state) {
.string => switch (c) {
'\\' => state = .escape,
'"' => return error.InvalidString,
else => filename.appendAssumeCapacity(c),
},
.escape => switch (c) {
'\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
const escaped_c = switch (c) {
'\'', '"', '\\', '?' => c,
'n' => '\n',
'r' => '\r',
't' => '\t',
'a' => '\x07',
'b' => '\x08',
'e' => '\x1b', // non-standard
'f' => '\x0c',
'v' => '\x0b',
else => unreachable,
};
filename.appendAssumeCapacity(escaped_c);
state = .string;
},
'x' => {
escape_val = 0;
escape_len = 0;
state = .escape_hex;
},
'0'...'7' => {
escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
escape_len = 1;
state = .escape_octal;
},
'u' => {
escape_val = 0;
escape_len = 0;
state = .escape_u;
escape_expected_len = 4;
},
'U' => {
escape_val = 0;
escape_len = 0;
state = .escape_u;
escape_expected_len = 8;
},
else => return error.InvalidString,
},
.escape_hex => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
},
else => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
index -= 1; // reconsume
},
},
.escape_octal => switch (c) {
'0'...'7' => {
const digit = std.fmt.charToDigit(c, 8) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
if (escape_len == 3) {
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
}
},
else => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
state = .string;
index -= 1; // reconsume
},
},
.escape_u => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {
const digit = std.fmt.charToDigit(c, 16) catch unreachable;
if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
escape_len += 1;
if (escape_len == escape_expected_len) {
var buf: [4]u8 = undefined;
const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
state = .string;
}
},
// Requires escape_expected_len valid hex digits
else => return error.InvalidString,
},
}
} else {
switch (state) {
.string => {},
.escape, .escape_u => return error.InvalidString,
.escape_hex => {
if (escape_len == 0) return error.InvalidString;
filename.appendAssumeCapacity(@intCast(escape_val));
},
.escape_octal => {
filename.appendAssumeCapacity(@intCast(escape_val));
},
}
}
return filename.toOwnedSlice(allocator);
}
fn testParseFilename(expected: []const u8, input: []const u8) !void {
const parsed = try parseFilename(std.testing.allocator, input);
defer std.testing.allocator.free(parsed);
return std.testing.expectEqualSlices(u8, expected, parsed);
}
test parseFilename {
try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
try testParseFilename("\xABz\x53", "\\xABz\\123");
try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
}
pub const SourceMappings = struct {
sources: Sources = .{},
files: StringTable = .{},
/// The default assumes that the first filename added is the root file.
/// The value should be set to the correct offset if that assumption does not hold.
root_filename_offset: u32 = 0,
source_node_pool: std.heap.MemoryPool(Sources.Node) = .empty,
end_line: usize = 0,
const sourceCompare = struct {
fn compare(a: Source, b: Source) std.math.Order {
return std.math.order(a.start_line, b.start_line);
}
}.compare;
const Sources = std.Treap(Source, sourceCompare);
pub const Source = struct {
start_line: usize,
span: usize = 0,
corresponding_start_line: usize,
filename_offset: u32,
};
pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
self.files.deinit(allocator);
self.source_node_pool.deinit(std.heap.page_allocator);
}
/// Find the node that 'contains' the `line`, i.e. the node's start_line is
/// >= `line`
fn findNode(self: SourceMappings, line: usize) ?*Sources.Node {
var node = self.sources.root;
var last_gt: ?*Sources.Node = null;
var search_key: Source = undefined;
search_key.start_line = line;
while (node) |current| {
const order = sourceCompare(search_key, current.key);
if (order == .eq) break;
if (order == .gt) last_gt = current;
node = current.children[@intFromBool(order == .gt)] orelse {
// Regardless of the current order, last_gt will contain the
// the node we want to return.
//
// If search key is > current node's key, then last_gt will be
// current which we now know is the closest node that is <=
// the search key.
//
//
// If the key is < current node's key, we want to jump back to the
// node that the search key was most recently greater than.
// This is necessary for scenarios like (where the search key is 2):
//
// 1
// \
// 6
// /
// 3
//
// In this example, we'll get down to the '3' node but ultimately want
// to return the '1' node.
//
// Note: If we've never seen a key that the search key is greater than,
// then we know that there's no valid node, so last_gt will be null.
return last_gt;
};
}
return node;
}
/// Note: `line_num` and `corresponding_line_num` start at 1
pub fn set(self: *SourceMappings, line_num: usize, corresponding_line_num: usize, filename_offset: u32) !void {
const maybe_node = self.findNode(line_num);
const need_new_node = need_new_node: {
if (maybe_node) |node| {
if (node.key.filename_offset != filename_offset) {
break :need_new_node true;
}
// TODO: These use i65 to avoid truncation when any of the line number values
// use all 64 bits of the usize. In reality, line numbers can't really
// get that large so limiting the line number and using a smaller iX
// type here might be a better solution.
const exist_delta = @as(i65, @intCast(node.key.corresponding_start_line)) - @as(i65, @intCast(node.key.start_line));
const cur_delta = @as(i65, @intCast(corresponding_line_num)) - @as(i65, @intCast(line_num));
if (exist_delta != cur_delta) {
break :need_new_node true;
}
break :need_new_node false;
}
break :need_new_node true;
};
if (need_new_node) {
// spans must not overlap
if (maybe_node) |node| {
std.debug.assert(node.key.start_line != line_num);
}
const key = Source{
.start_line = line_num,
.corresponding_start_line = corresponding_line_num,
.filename_offset = filename_offset,
};
var entry = self.sources.getEntryFor(key);
var new_node = try self.source_node_pool.create(std.heap.page_allocator);
new_node.key = key;
entry.set(new_node);
}
if (line_num > self.end_line) {
self.end_line = line_num;
}
}
/// Note: `line_num` starts at 1
pub fn get(self: SourceMappings, line_num: usize) ?Source {
const node = self.findNode(line_num) orelse return null;
return node.key;
}
pub const CorrespondingSpan = struct {
start_line: usize,
end_line: usize,
filename_offset: u32,
};
pub fn getCorrespondingSpan(self: SourceMappings, line_num: usize) ?CorrespondingSpan {
const source = self.get(line_num) orelse return null;
const diff = line_num - source.start_line;
const start_line = source.corresponding_start_line + (if (line_num == source.start_line) 0 else source.span + diff);
const end_line = start_line + (if (line_num == source.start_line) source.span else 0);
return CorrespondingSpan{
.start_line = start_line,
.end_line = end_line,
.filename_offset = source.filename_offset,
};
}
pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) !void {
std.debug.assert(num_following_lines_to_collapse > 0);
var node = self.findNode(line_num).?;
const span_diff = num_following_lines_to_collapse;
if (node.key.start_line != line_num) {
const offset = line_num - node.key.start_line;
const key = Source{
.start_line = line_num,
.span = num_following_lines_to_collapse,
.corresponding_start_line = node.key.corresponding_start_line + node.key.span + offset,
.filename_offset = node.key.filename_offset,
};
var entry = self.sources.getEntryFor(key);
var new_node = try self.source_node_pool.create(std.heap.page_allocator);
new_node.key = key;
entry.set(new_node);
node = new_node;
} else {
node.key.span += span_diff;
}
// now subtract the span diff from the start line number of all of
// the following nodes in order
var it = Sources.InorderIterator{ .current = node };
// skip past current, but store it
var prev = it.next().?;
while (it.next()) |inorder_node| {
inorder_node.key.start_line -= span_diff;
// This can only really happen if there are #line commands within
// a multiline comment, which should be skipped over.
std.debug.assert(prev.key.start_line <= inorder_node.key.start_line);
prev = inorder_node;
}
self.end_line -= span_diff;
}
/// Returns true if the line is from the main/root file (i.e. not a file that has been
/// `#include`d).
pub fn isRootFile(self: *const SourceMappings, line_num: usize) bool {
const source = self.get(line_num) orelse return false;
return source.filename_offset == self.root_filename_offset;
}
};
test "SourceMappings collapse" {
const allocator = std.testing.allocator;
var mappings = SourceMappings{};
defer mappings.deinit(allocator);
const filename_offset = try mappings.files.put(allocator, "test.rc");
try mappings.set(1, 1, filename_offset);
try mappings.set(5, 5, filename_offset);
try mappings.collapse(2, 2);
try std.testing.expectEqual(@as(usize, 3), mappings.end_line);
const span_1 = mappings.getCorrespondingSpan(1).?;
try std.testing.expectEqual(@as(usize, 1), span_1.start_line);
try std.testing.expectEqual(@as(usize, 1), span_1.end_line);
const span_2 = mappings.getCorrespondingSpan(2).?;
try std.testing.expectEqual(@as(usize, 2), span_2.start_line);
try std.testing.expectEqual(@as(usize, 4), span_2.end_line);
const span_3 = mappings.getCorrespondingSpan(3).?;
try std.testing.expectEqual(@as(usize, 5), span_3.start_line);
try std.testing.expectEqual(@as(usize, 5), span_3.end_line);
}
/// Same thing as StringTable in Zig's src/Wasm.zig
pub const StringTable = struct {
data: std.ArrayList(u8) = .empty,
map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .empty,
pub fn deinit(self: *StringTable, allocator: Allocator) void {
self.data.deinit(allocator);
self.map.deinit(allocator);
}
pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
const result = try self.map.getOrPutContextAdapted(
allocator,
value,
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
.{ .bytes = &self.data },
);
if (result.found_existing) {
return result.key_ptr.*;
}
try self.data.ensureUnusedCapacity(allocator, value.len + 1);
const offset: u32 = @intCast(self.data.items.len);
self.data.appendSliceAssumeCapacity(value);
self.data.appendAssumeCapacity(0);
result.key_ptr.* = offset;
return offset;
}
pub fn get(self: StringTable, offset: u32) []const u8 {
std.debug.assert(offset < self.data.items.len);
return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
}
pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
return self.map.getKeyAdapted(
value,
std.hash_map.StringIndexAdapter{ .bytes = &self.data },
);
}
};
const ExpectedSourceSpan = struct {
start_line: usize,
end_line: usize,
filename: []const u8,
};
fn testParseAndRemoveLineCommands(
expected: []const u8,
comptime expected_spans: []const ExpectedSourceSpan,
source: []const u8,
options: ParseAndRemoveLineCommandsOptions,
) !void {
var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
defer std.testing.allocator.free(results.result);
defer results.mappings.deinit(std.testing.allocator);
try std.testing.expectEqualStrings(expected, results.result);
expectEqualMappings(expected_spans, results.mappings) catch |err| {
std.debug.print("\nexpected mappings:\n", .{});
for (expected_spans, 0..) |span, i| {
const line_num = i + 1;
std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
}
std.debug.print("\nactual mappings:\n", .{});
var i: usize = 1;
while (i <= results.mappings.end_line) : (i += 1) {
const span = results.mappings.getCorrespondingSpan(i).?;
const filename = results.mappings.files.get(span.filename_offset);
std.debug.print("{}: {s}:{}-{}\n", .{ i, filename, span.start_line, span.end_line });
}
std.debug.print("\n", .{});
return err;
};
}
fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
try std.testing.expectEqual(expected_spans.len, mappings.end_line);
for (expected_spans, 0..) |expected_span, i| {
const line_num = i + 1;
const span = mappings.getCorrespondingSpan(line_num) orelse return error.MissingLineNum;
const filename = mappings.files.get(span.filename_offset);
try std.testing.expectEqual(expected_span.start_line, span.start_line);
try std.testing.expectEqual(expected_span.end_line, span.end_line);
try std.testing.expectEqualStrings(expected_span.filename, filename);
}
}
test "basic" {
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
}, "#line 1 \"blah.rc\"", .{});
}
test "only removes line commands" {
try testParseAndRemoveLineCommands(
\\#pragma code_page(65001)
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
},
\\#line 1 "blah.rc"
\\#pragma code_page(65001)
, .{});
}
test "whitespace and line endings" {
try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
}, "#line \t 1 \t \"blah.rc\"\r\n", .{});
}
test "example" {
try testParseAndRemoveLineCommands(
\\
\\included RCDATA {"hello"}
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
},
\\#line 1 "rcdata.rc"
\\#line 1 "<built-in>"
\\#line 1 "<built-in>"
\\#line 355 "<built-in>"
\\#line 1 "<command line>"
\\#line 1 "<built-in>"
\\#line 1 "rcdata.rc"
\\#line 1 "./header.h"
\\
\\
\\2 RCDATA {"blah"}
\\
\\
\\#line 1 "./included.rc"
\\
\\included RCDATA {"hello"}
\\#line 7 "./header.h"
\\#line 1 "rcdata.rc"
, .{});
}
test "CRLF and other line endings" {
try testParseAndRemoveLineCommands(
"hello\r\n#pragma code_page(65001)\r\nworld",
&[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
},
"#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
.{},
);
}
test "no line commands" {
try testParseAndRemoveLineCommands(
\\1 RCDATA {"blah"}
\\2 RCDATA {"blah"}
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
},
\\1 RCDATA {"blah"}
\\2 RCDATA {"blah"}
, .{ .initial_filename = "blah.rc" });
}
test "in place" {
var mut_source = "#line 1 \"blah.rc\"".*;
var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
defer result.mappings.deinit(std.testing.allocator);
try std.testing.expectEqualStrings("", result.result);
}
test "line command within a multiline comment" {
try testParseAndRemoveLineCommands(
\\/*
\\#line 1 "irrelevant.rc"
\\
\\
\\*/
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
.{ .start_line = 4, .end_line = 4, .filename = "blah.rc" },
.{ .start_line = 5, .end_line = 5, .filename = "blah.rc" },
},
\\/*
\\#line 1 "irrelevant.rc"
\\
\\
\\*/
, .{ .initial_filename = "blah.rc" });
// * but without / directly after
try testParseAndRemoveLineCommands(
\\/** /
\\#line 1 "irrelevant.rc"
\\*/
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
},
\\/** /
\\#line 1 "irrelevant.rc"
\\*/
, .{ .initial_filename = "blah.rc" });
// /** and **/
try testParseAndRemoveLineCommands(
\\/**
\\#line 1 "irrelevant.rc"
\\**/
\\foo
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
.{ .start_line = 20, .end_line = 20, .filename = "blah.rc" },
},
\\/**
\\#line 1 "irrelevant.rc"
\\**/
\\#line 20 "blah.rc"
\\foo
, .{ .initial_filename = "blah.rc" });
}
test "whitespace preservation" {
try testParseAndRemoveLineCommands(
\\ /
\\/
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
},
\\ /
\\/
, .{ .initial_filename = "blah.rc" });
}
test "preprocessor line with a multiline comment after" {
try testParseAndRemoveLineCommands(
\\#pragma test /*
\\
\\*/
, &[_]ExpectedSourceSpan{
.{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
.{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
.{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
},
\\#pragma test /*
\\
\\*/
, .{ .initial_filename = "blah.rc" });
}
test "comment after line command" {
var mut_source = "#line 1 \"blah.rc\" /*".*;
try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
}
test "line command with 0 as line number" {
var mut_source = "#line 0 \"blah.rc\"".*;
try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
}
test "line number limits" {
// TODO: Avoid usize for line numbers
if (@sizeOf(usize) != 8) return error.SkipZigTest;
// greater than i64 max
try testParseAndRemoveLineCommands(
\\
, &[_]ExpectedSourceSpan{
.{ .start_line = 11111111111111111111, .end_line = 11111111111111111111, .filename = "blah.rc" },
},
\\#line 11111111111111111111 "blah.rc"
, .{ .initial_filename = "blah.rc" });
// equal to u64 max, overflows on line number increment
{
var mut_source = "#line 18446744073709551615 \"blah.rc\"".*;
try std.testing.expectError(error.LineNumberOverflow, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
}
// greater than u64 max
{
var mut_source = "#line 18446744073709551616 \"blah.rc\"".*;
try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}));
}
}