const std = @import("std"); const mem = std.mem; const Allocator = mem.Allocator; const assert = std.debug.assert; const Compilation = @import("Compilation.zig"); const Error = Compilation.Error; const Source = @import("Source.zig"); const Tokenizer = @import("Tokenizer.zig"); const RawToken = Tokenizer.Token; const Parser = @import("Parser.zig"); const Diagnostics = @import("Diagnostics.zig"); const Token = @import("Tree.zig").Token; const Attribute = @import("Attribute.zig"); const features = @import("features.zig"); const Preprocessor = @This(); const DefineMap = std.StringHashMap(Macro); const RawTokenList = std.ArrayList(RawToken); const max_include_depth = 200; /// Errors that can be returned when expanding a macro. /// error.UnknownPragma can occur within Preprocessor.pragma() but /// it is handled there and doesn't escape that function const MacroError = Error || error{StopPreprocessing}; const Macro = struct { /// Parameters of the function type macro params: []const []const u8, /// Token constituting the macro body tokens: []const RawToken, /// If the function type macro has variable number of arguments var_args: bool, /// Is a function type macro is_func: bool, /// Is a predefined macro is_builtin: bool = false, /// Location of macro in the source /// `byte_offset` and `line` are used to define the range of tokens included /// in the macro. loc: Source.Location, fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool { if (a.tokens.len != b.tokens.len) return false; if (a.is_builtin != b.is_builtin) return false; for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false; if (a.is_func and b.is_func) { if (a.var_args != b.var_args) return false; if (a.params.len != b.params.len) return false; for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false; } return true; } fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool { return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b)); } }; comp: *Compilation, gpa: mem.Allocator, arena: std.heap.ArenaAllocator, defines: DefineMap, tokens: Token.List = .{}, token_buf: RawTokenList, char_buf: std.ArrayList(u8), /// Counter that is incremented each time preprocess() is called /// Can be used to distinguish multiple preprocessings of the same file preprocess_count: u32 = 0, generated_line: u32 = 1, add_expansion_nl: u32 = 0, include_depth: u8 = 0, counter: u32 = 0, expansion_source_loc: Source.Location = undefined, poisoned_identifiers: std.StringHashMap(void), /// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{}, /// Memory is retained to avoid allocation on every single token. top_expansion_buf: ExpandBuf, /// Dump current state to stderr. verbose: bool = false, preserve_whitespace: bool = false, /// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers) linemarkers: Linemarkers = .none, pub const Linemarkers = enum { /// No linemarker tokens. Required setting if parser will run none, /// #line "filename" line_directives, /// # "filename" flags numeric_directives, }; pub fn init(comp: *Compilation) Preprocessor { const pp = Preprocessor{ .comp = comp, .gpa = comp.gpa, .arena = std.heap.ArenaAllocator.init(comp.gpa), .defines = DefineMap.init(comp.gpa), .token_buf = RawTokenList.init(comp.gpa), .char_buf = std.ArrayList(u8).init(comp.gpa), .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa), .top_expansion_buf = ExpandBuf.init(comp.gpa), }; comp.pragmaEvent(.before_preprocess); return pp; } const builtin_macros = struct { const args = [1][]const u8{"X"}; const has_attribute = [1]RawToken{.{ .id = .macro_param_has_attribute, .source = .generated, }}; const has_declspec_attribute = [1]RawToken{.{ .id = .macro_param_has_declspec_attribute, .source = .generated, }}; const has_warning = [1]RawToken{.{ .id = .macro_param_has_warning, .source = .generated, }}; const has_feature = [1]RawToken{.{ .id = .macro_param_has_feature, .source = .generated, }}; const has_extension = [1]RawToken{.{ .id = .macro_param_has_extension, .source = .generated, }}; const has_builtin = [1]RawToken{.{ .id = .macro_param_has_builtin, .source = .generated, }}; const has_include = [1]RawToken{.{ .id = .macro_param_has_include, .source = .generated, }}; const has_include_next = [1]RawToken{.{ .id = .macro_param_has_include_next, .source = .generated, }}; const is_identifier = [1]RawToken{.{ .id = .macro_param_is_identifier, .source = .generated, }}; const pragma_operator = [1]RawToken{.{ .id = .macro_param_pragma_operator, .source = .generated, }}; const file = [1]RawToken{.{ .id = .macro_file, .source = .generated, }}; const line = [1]RawToken{.{ .id = .macro_line, .source = .generated, }}; const counter = [1]RawToken{.{ .id = .macro_counter, .source = .generated, }}; }; fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: []const RawToken) !void { try pp.defines.putNoClobber(name, .{ .params = &builtin_macros.args, .tokens = tokens, .var_args = false, .is_func = is_func, .loc = .{ .id = .generated }, .is_builtin = true, }); } pub fn addBuiltinMacros(pp: *Preprocessor) !void { try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute); try pp.addBuiltinMacro("__has_declspec_attribute", true, &builtin_macros.has_declspec_attribute); try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning); try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature); try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension); try pp.addBuiltinMacro("__has_builtin", true, &builtin_macros.has_builtin); try pp.addBuiltinMacro("__has_include", true, &builtin_macros.has_include); try pp.addBuiltinMacro("__has_include_next", true, &builtin_macros.has_include_next); try pp.addBuiltinMacro("__is_identifier", true, &builtin_macros.is_identifier); try pp.addBuiltinMacro("_Pragma", true, &builtin_macros.pragma_operator); try pp.addBuiltinMacro("__FILE__", false, &builtin_macros.file); try pp.addBuiltinMacro("__LINE__", false, &builtin_macros.line); try pp.addBuiltinMacro("__COUNTER__", false, &builtin_macros.counter); } pub fn deinit(pp: *Preprocessor) void { pp.defines.deinit(); for (pp.tokens.items(.expansion_locs)) |loc| Token.free(loc, pp.gpa); pp.tokens.deinit(pp.gpa); pp.arena.deinit(); pp.token_buf.deinit(); pp.char_buf.deinit(); pp.poisoned_identifiers.deinit(); pp.include_guards.deinit(pp.gpa); pp.top_expansion_buf.deinit(); } /// Preprocess a source file, returns eof token. pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token { const eof = pp.preprocessExtra(source) catch |er| switch (er) { // This cannot occur in the main file and is handled in `include`. error.StopPreprocessing => unreachable, else => |e| return e, }; try eof.checkMsEof(source, pp.comp); return eof; } /// Tokenize a file without any preprocessing, returns eof token. pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token { assert(pp.linemarkers == .none); assert(pp.preserve_whitespace == false); var tokenizer = Tokenizer{ .buf = source.buf, .comp = pp.comp, .source = source.id, }; // Estimate how many new tokens this source will contain. const estimated_token_count = source.buf.len / 8; try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count); while (true) { var tok = tokenizer.next(); if (tok.id == .eof) return tokFromRaw(tok); try pp.tokens.append(pp.gpa, tokFromRaw(tok)); } } pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void { if (pp.linemarkers == .none) return; try pp.tokens.append(pp.gpa, .{ .id = .include_start, .loc = .{ .id = source.id, .byte_offset = std.math.maxInt(u32), .line = 0, } }); } pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void { if (pp.linemarkers == .none) return; try pp.tokens.append(pp.gpa, .{ .id = .include_resume, .loc = .{ .id = source, .byte_offset = offset, .line = line, } }); } /// Return the name of the #ifndef guard macro that starts a source, if any. fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 { var tokenizer = Tokenizer{ .buf = source.buf, .comp = pp.comp, .source = source.id, }; var hash = tokenizer.nextNoWS(); while (hash.id == .nl) hash = tokenizer.nextNoWS(); if (hash.id != .hash) return null; const ifndef = tokenizer.nextNoWS(); if (ifndef.id != .keyword_ifndef) return null; const guard = tokenizer.nextNoWS(); if (guard.id != .identifier) return null; return pp.tokSlice(guard); } fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { var guard_name = pp.findIncludeGuard(source); pp.preprocess_count += 1; var tokenizer = Tokenizer{ .buf = source.buf, .comp = pp.comp, .source = source.id, }; // Estimate how many new tokens this source will contain. const estimated_token_count = source.buf.len / 8; try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count); var if_level: u8 = 0; var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256); const until_else = 0; const until_endif = 1; const until_endif_seen_else = 2; var start_of_line = true; while (true) { var tok = tokenizer.next(); switch (tok.id) { .hash => if (!start_of_line) try pp.tokens.append(pp.gpa, tokFromRaw(tok)) else { const directive = tokenizer.nextNoWS(); switch (directive.id) { .keyword_error, .keyword_warning => { // #error tokens.. pp.top_expansion_buf.items.len = 0; const char_top = pp.char_buf.items.len; defer pp.char_buf.items.len = char_top; while (true) { tok = tokenizer.next(); if (tok.id == .nl or tok.id == .eof) break; if (tok.id == .whitespace) tok.id = .macro_ws; try pp.top_expansion_buf.append(tokFromRaw(tok)); } try pp.stringify(pp.top_expansion_buf.items); const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2]; const duped = try pp.comp.diag.arena.allocator().dupe(u8, slice); try pp.comp.diag.add(.{ .tag = if (directive.id == .keyword_error) .error_directive else .warning_directive, .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line }, .extra = .{ .str = duped }, }, &.{}); }, .keyword_if => { const sum, const overflowed = @addWithOverflow(if_level, 1); if (overflowed != 0) return pp.fatal(directive, "too many #if nestings", .{}); if_level = sum; if (try pp.expr(&tokenizer)) { if_kind.set(if_level, until_endif); if (pp.verbose) { pp.verboseLog(directive, "entering then branch of #if", .{}); } } else { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #if", .{}); } } }, .keyword_ifdef => { const sum, const overflowed = @addWithOverflow(if_level, 1); if (overflowed != 0) return pp.fatal(directive, "too many #if nestings", .{}); if_level = sum; const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; try pp.expectNl(&tokenizer); if (pp.defines.get(macro_name) != null) { if_kind.set(if_level, until_endif); if (pp.verbose) { pp.verboseLog(directive, "entering then branch of #ifdef", .{}); } } else { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #ifdef", .{}); } } }, .keyword_ifndef => { const sum, const overflowed = @addWithOverflow(if_level, 1); if (overflowed != 0) return pp.fatal(directive, "too many #if nestings", .{}); if_level = sum; const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; try pp.expectNl(&tokenizer); if (pp.defines.get(macro_name) == null) { if_kind.set(if_level, until_endif); } else { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); } }, .keyword_elif => { if (if_level == 0) { try pp.err(directive, .elif_without_if); if_level += 1; if_kind.set(if_level, until_else); } else if (if_level == 1) { guard_name = null; } switch (if_kind.get(if_level)) { until_else => if (try pp.expr(&tokenizer)) { if_kind.set(if_level, until_endif); if (pp.verbose) { pp.verboseLog(directive, "entering then branch of #elif", .{}); } } else { try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #elif", .{}); } }, until_endif => try pp.skip(&tokenizer, .until_endif), until_endif_seen_else => { try pp.err(directive, .elif_after_else); skipToNl(&tokenizer); }, else => unreachable, } }, .keyword_elifdef => { if (if_level == 0) { try pp.err(directive, .elifdef_without_if); if_level += 1; if_kind.set(if_level, until_else); } else if (if_level == 1) { guard_name = null; } switch (if_kind.get(if_level)) { until_else => { const macro_name = try pp.expectMacroName(&tokenizer); if (macro_name == null) { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #elifdef", .{}); } } else { try pp.expectNl(&tokenizer); if (pp.defines.get(macro_name.?) != null) { if_kind.set(if_level, until_endif); if (pp.verbose) { pp.verboseLog(directive, "entering then branch of #elifdef", .{}); } } else { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #elifdef", .{}); } } } }, until_endif => try pp.skip(&tokenizer, .until_endif), until_endif_seen_else => { try pp.err(directive, .elifdef_after_else); skipToNl(&tokenizer); }, else => unreachable, } }, .keyword_elifndef => { if (if_level == 0) { try pp.err(directive, .elifdef_without_if); if_level += 1; if_kind.set(if_level, until_else); } else if (if_level == 1) { guard_name = null; } switch (if_kind.get(if_level)) { until_else => { const macro_name = try pp.expectMacroName(&tokenizer); if (macro_name == null) { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #elifndef", .{}); } } else { try pp.expectNl(&tokenizer); if (pp.defines.get(macro_name.?) == null) { if_kind.set(if_level, until_endif); if (pp.verbose) { pp.verboseLog(directive, "entering then branch of #elifndef", .{}); } } else { if_kind.set(if_level, until_else); try pp.skip(&tokenizer, .until_else); if (pp.verbose) { pp.verboseLog(directive, "entering else branch of #elifndef", .{}); } } } }, until_endif => try pp.skip(&tokenizer, .until_endif), until_endif_seen_else => { try pp.err(directive, .elifdef_after_else); skipToNl(&tokenizer); }, else => unreachable, } }, .keyword_else => { try pp.expectNl(&tokenizer); if (if_level == 0) { try pp.err(directive, .else_without_if); continue; } else if (if_level == 1) { guard_name = null; } switch (if_kind.get(if_level)) { until_else => { if_kind.set(if_level, until_endif_seen_else); if (pp.verbose) { pp.verboseLog(directive, "#else branch here", .{}); } }, until_endif => try pp.skip(&tokenizer, .until_endif_seen_else), until_endif_seen_else => { try pp.err(directive, .else_after_else); skipToNl(&tokenizer); }, else => unreachable, } }, .keyword_endif => { try pp.expectNl(&tokenizer); if (if_level == 0) { guard_name = null; try pp.err(directive, .endif_without_if); continue; } else if (if_level == 1) { const saved_tokenizer = tokenizer; defer tokenizer = saved_tokenizer; var next = tokenizer.nextNoWS(); while (next.id == .nl) : (next = tokenizer.nextNoWS()) {} if (next.id != .eof) guard_name = null; } if_level -= 1; }, .keyword_define => try pp.define(&tokenizer), .keyword_undef => { const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; _ = pp.defines.remove(macro_name); try pp.expectNl(&tokenizer); }, .keyword_include => { try pp.include(&tokenizer, .first); continue; }, .keyword_include_next => { try pp.comp.diag.add(.{ .tag = .include_next, .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line }, }, &.{}); if (pp.include_depth == 0) { try pp.comp.diag.add(.{ .tag = .include_next_outside_header, .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line }, }, &.{}); try pp.include(&tokenizer, .first); } else { try pp.include(&tokenizer, .next); } }, .keyword_embed => try pp.embed(&tokenizer), .keyword_pragma => { try pp.pragma(&tokenizer, directive, null, &.{}); continue; }, .keyword_line => { // #line number "file" const digits = tokenizer.nextNoWS(); if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit); // TODO: validate that the pp_num token is solely digits if (digits.id == .eof or digits.id == .nl) continue; const name = tokenizer.nextNoWS(); if (name.id == .eof or name.id == .nl) continue; if (name.id != .string_literal) try pp.err(name, .line_invalid_filename); try pp.expectNl(&tokenizer); }, .pp_num => { // # number "file" flags // TODO: validate that the pp_num token is solely digits // if not, emit `GNU line marker directive requires a simple digit sequence` const name = tokenizer.nextNoWS(); if (name.id == .eof or name.id == .nl) continue; if (name.id != .string_literal) try pp.err(name, .line_invalid_filename); const flag_1 = tokenizer.nextNoWS(); if (flag_1.id == .eof or flag_1.id == .nl) continue; const flag_2 = tokenizer.nextNoWS(); if (flag_2.id == .eof or flag_2.id == .nl) continue; const flag_3 = tokenizer.nextNoWS(); if (flag_3.id == .eof or flag_3.id == .nl) continue; const flag_4 = tokenizer.nextNoWS(); if (flag_4.id == .eof or flag_4.id == .nl) continue; try pp.expectNl(&tokenizer); }, .nl => {}, .eof => { if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive); return tokFromRaw(directive); }, else => { try pp.err(tok, .invalid_preprocessing_directive); skipToNl(&tokenizer); }, } if (pp.preserve_whitespace) { tok.id = .nl; try pp.tokens.append(pp.gpa, tokFromRaw(tok)); } }, .whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)), .nl => { start_of_line = true; if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)); }, .eof => { if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive); // The following check needs to occur here and not at the top of the function // because a pragma may change the level during preprocessing if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') { try pp.err(tok, .newline_eof); } if (guard_name) |name| { if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| { assert(mem.eql(u8, name, prev.value)); } } return tokFromRaw(tok); }, else => { if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) { try pp.err(tok, .poisoned_identifier); } // Add the token to the buffer doing any necessary expansions. start_of_line = false; try pp.expandMacro(&tokenizer, tok); }, } } } /// Get raw token source string. /// Returned slice is invalidated when comp.generated_buf is updated. pub fn tokSlice(pp: *Preprocessor, token: RawToken) []const u8 { if (token.id.lexeme()) |some| return some; const source = pp.comp.getSource(token.source); return source.buf[token.start..token.end]; } /// Convert a token from the Tokenizer into a token used by the parser. fn tokFromRaw(raw: RawToken) Token { return .{ .id = raw.id, .loc = .{ .id = raw.source, .byte_offset = raw.start, .line = raw.line, }, }; } fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void { try pp.comp.diag.add(.{ .tag = tag, .loc = .{ .id = raw.source, .byte_offset = raw.start, .line = raw.line, }, }, &.{}); } fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error { const source = pp.comp.getSource(raw.source); const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start }); return pp.comp.diag.fatal(source.path, line_col.line, raw.line, line_col.col, fmt, args); } fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void { const source = pp.comp.getSource(raw.source); const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start }); const stderr = std.io.getStdErr().writer(); var buf_writer = std.io.bufferedWriter(stderr); const writer = buf_writer.writer(); defer buf_writer.flush() catch {}; writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return; writer.print(fmt, args) catch return; writer.writeByte('\n') catch return; writer.writeAll(line_col.line) catch return; writer.writeByte('\n') catch return; } /// Consume next token, error if it is not an identifier. fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 { const macro_name = tokenizer.nextNoWS(); if (!macro_name.id.isMacroIdentifier()) { try pp.err(macro_name, .macro_name_missing); skipToNl(tokenizer); return null; } return pp.tokSlice(macro_name); } /// Skip until after a newline, error if extra tokens before it. fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { var sent_err = false; while (true) { const tok = tokenizer.next(); if (tok.id == .nl or tok.id == .eof) return; if (tok.id == .whitespace) continue; if (!sent_err) { sent_err = true; try pp.err(tok, .extra_tokens_directive_end); } } } /// Consume all tokens until a newline and parse the result into a boolean. fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { const start = pp.tokens.len; defer { for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa); pp.tokens.len = start; } pp.top_expansion_buf.items.len = 0; const eof = while (true) { var tok = tokenizer.next(); switch (tok.id) { .nl, .eof => break tok, .whitespace => if (pp.top_expansion_buf.items.len == 0) continue, else => {}, } try pp.top_expansion_buf.append(tokFromRaw(tok)); } else unreachable; if (pp.top_expansion_buf.items.len != 0) { pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc; try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr); } for (pp.top_expansion_buf.items) |tok| { if (tok.id == .macro_ws) continue; if (!tok.id.validPreprocessorExprStart()) { try pp.comp.diag.add(.{ .tag = .invalid_preproc_expr_start, .loc = tok.loc, }, tok.expansionSlice()); return false; } break; } else { try pp.err(eof, .expected_value_in_expr); return false; } // validate the tokens in the expression try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len); var i: usize = 0; const items = pp.top_expansion_buf.items; while (i < items.len) : (i += 1) { var tok = items[i]; switch (tok.id) { .string_literal, .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32, .string_literal_wide, => { try pp.comp.diag.add(.{ .tag = .string_literal_in_pp_expr, .loc = tok.loc, }, tok.expansionSlice()); return false; }, .plus_plus, .minus_minus, .plus_equal, .minus_equal, .asterisk_equal, .slash_equal, .percent_equal, .angle_bracket_angle_bracket_left_equal, .angle_bracket_angle_bracket_right_equal, .ampersand_equal, .caret_equal, .pipe_equal, .l_bracket, .r_bracket, .l_brace, .r_brace, .ellipsis, .semicolon, .hash, .hash_hash, .equal, .arrow, .period, => { try pp.comp.diag.add(.{ .tag = .invalid_preproc_operator, .loc = tok.loc, }, tok.expansionSlice()); return false; }, .macro_ws, .whitespace => continue, .keyword_false => tok.id = .zero, .keyword_true => tok.id = .one, else => if (tok.id.isMacroIdentifier()) { if (tok.id == .keyword_defined) { const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof); i += tokens_consumed; } else { try pp.comp.diag.add(.{ .tag = .undefined_macro, .loc = tok.loc, .extra = .{ .str = pp.expandedSlice(tok) }, }, tok.expansionSlice()); if (i + 1 < pp.top_expansion_buf.items.len and pp.top_expansion_buf.items[i + 1].id == .l_paren) { try pp.comp.diag.add(.{ .tag = .fn_macro_undefined, .loc = tok.loc, .extra = .{ .str = pp.expandedSlice(tok) }, }, tok.expansionSlice()); return false; } tok.id = .zero; // undefined macro } }, } pp.tokens.appendAssumeCapacity(tok); } try pp.tokens.append(pp.gpa, .{ .id = .eof, .loc = tokFromRaw(eof).loc, }); // Actually parse it. var parser = Parser{ .pp = pp, .comp = pp.comp, .gpa = pp.gpa, .tok_ids = pp.tokens.items(.id), .tok_i = @intCast(start), .arena = pp.arena.allocator(), .in_macro = true, .data = undefined, .strings = undefined, .retained_strings = undefined, .value_map = undefined, .labels = undefined, .decl_buf = undefined, .list_buf = undefined, .param_buf = undefined, .enum_buf = undefined, .record_buf = undefined, .attr_buf = undefined, .field_attr_buf = undefined, .string_ids = undefined, }; return parser.macroExpr(); } /// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined /// Returns the number of tokens consumed fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *Token, tokens: []const Token, eof: RawToken) !usize { std.debug.assert(macro_tok.id == .keyword_defined); var it = TokenIterator.init(tokens); const first = it.nextNoWS() orelse { try pp.err(eof, .macro_name_missing); return it.i; }; switch (first.id) { .l_paren => {}, else => { if (!first.id.isMacroIdentifier()) { try pp.comp.diag.add(.{ .tag = .macro_name_must_be_identifier, .loc = first.loc, .extra = .{ .str = pp.expandedSlice(first) }, }, first.expansionSlice()); } macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero; return it.i; }, } const second = it.nextNoWS() orelse { try pp.err(eof, .macro_name_missing); return it.i; }; if (!second.id.isMacroIdentifier()) { try pp.comp.diag.add(.{ .tag = .macro_name_must_be_identifier, .loc = second.loc, }, second.expansionSlice()); return it.i; } macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero; const last = it.nextNoWS(); if (last == null or last.?.id != .r_paren) { const tok = last orelse tokFromRaw(eof); try pp.comp.diag.add(.{ .tag = .closing_paren, .loc = tok.loc, }, tok.expansionSlice()); try pp.comp.diag.add(.{ .tag = .to_match_paren, .loc = first.loc, }, first.expansionSlice()); } return it.i; } /// Skip until #else #elif #endif, return last directive token id. /// Also skips nested #if ... #endifs. fn skip( pp: *Preprocessor, tokenizer: *Tokenizer, cont: enum { until_else, until_endif, until_endif_seen_else }, ) Error!void { var ifs_seen: u32 = 0; var line_start = true; while (tokenizer.index < tokenizer.buf.len) { if (line_start) { const saved_tokenizer = tokenizer.*; const hash = tokenizer.nextNoWS(); if (hash.id == .nl) continue; line_start = false; if (hash.id != .hash) continue; const directive = tokenizer.nextNoWS(); switch (directive.id) { .keyword_else => { if (ifs_seen != 0) continue; if (cont == .until_endif_seen_else) { try pp.err(directive, .else_after_else); continue; } tokenizer.* = saved_tokenizer; return; }, .keyword_elif => { if (ifs_seen != 0 or cont == .until_endif) continue; if (cont == .until_endif_seen_else) { try pp.err(directive, .elif_after_else); continue; } tokenizer.* = saved_tokenizer; return; }, .keyword_elifdef => { if (ifs_seen != 0 or cont == .until_endif) continue; if (cont == .until_endif_seen_else) { try pp.err(directive, .elifdef_after_else); continue; } tokenizer.* = saved_tokenizer; return; }, .keyword_elifndef => { if (ifs_seen != 0 or cont == .until_endif) continue; if (cont == .until_endif_seen_else) { try pp.err(directive, .elifndef_after_else); continue; } tokenizer.* = saved_tokenizer; return; }, .keyword_endif => { if (ifs_seen == 0) { tokenizer.* = saved_tokenizer; return; } ifs_seen -= 1; }, .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1, else => {}, } } else if (tokenizer.buf[tokenizer.index] == '\n') { line_start = true; tokenizer.index += 1; tokenizer.line += 1; if (pp.preserve_whitespace) { try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); } } else { line_start = false; tokenizer.index += 1; } } else { const eof = tokenizer.next(); return pp.err(eof, .unterminated_conditional_directive); } } // Skip until newline, ignore other tokens. fn skipToNl(tokenizer: *Tokenizer) void { while (true) { const tok = tokenizer.next(); if (tok.id == .nl or tok.id == .eof) return; } } const ExpandBuf = std.ArrayList(Token); fn removePlacemarkers(buf: *ExpandBuf) void { var i: usize = buf.items.len -% 1; while (i < buf.items.len) : (i -%= 1) { if (buf.items[i].id == .placemarker) { const placemarker = buf.orderedRemove(i); Token.free(placemarker.expansion_locs, buf.allocator); } } } const MacroArguments = std.ArrayList([]const Token); fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void { for (args.items) |item| { for (item) |tok| Token.free(tok.expansion_locs, allocator); allocator.free(item); } args.deinit(); } fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf { var buf = ExpandBuf.init(pp.gpa); errdefer buf.deinit(); try buf.ensureTotalCapacity(simple_macro.tokens.len); // Add all of the simple_macros tokens to the new buffer handling any concats. var i: usize = 0; while (i < simple_macro.tokens.len) : (i += 1) { const raw = simple_macro.tokens[i]; const tok = tokFromRaw(raw); switch (raw.id) { .hash_hash => { var rhs = tokFromRaw(simple_macro.tokens[i + 1]); i += 1; while (true) { if (rhs.id == .whitespace) { rhs = tokFromRaw(simple_macro.tokens[i + 1]); i += 1; } else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) { rhs = tokFromRaw(simple_macro.tokens[i + 1]); i += 1; } else break; } try pp.pasteTokens(&buf, &.{rhs}); }, .whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok), .macro_file => { const start = pp.comp.generated_buf.items.len; const source = pp.comp.getSource(pp.expansion_source_loc.id); try pp.comp.generated_buf.writer().print("\"{s}\"\n", .{source.path}); buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok)); }, .macro_line => { const start = pp.comp.generated_buf.items.len; const source = pp.comp.getSource(pp.expansion_source_loc.id); try pp.comp.generated_buf.writer().print("{d}\n", .{source.physicalLine(pp.expansion_source_loc)}); buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok)); }, .macro_counter => { defer pp.counter += 1; const start = pp.comp.generated_buf.items.len; try pp.comp.generated_buf.writer().print("{d}\n", .{pp.counter}); buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok)); }, else => buf.appendAssumeCapacity(tok), } } return buf; } /// Join a possibly-parenthesized series of string literal tokens into a single string without /// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes. /// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal /// is encountered, or if no string literals are encountered /// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"') fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 { const char_top = pp.char_buf.items.len; defer pp.char_buf.items.len = char_top; var unwrapped = toks; if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) { unwrapped = toks[1 .. toks.len - 1]; } if (unwrapped.len == 0) return error.ExpectedStringLiteral; for (unwrapped) |tok| { if (tok.id == .macro_ws) continue; if (tok.id != .string_literal) return error.ExpectedStringLiteral; const str = pp.expandedSlice(tok); try pp.char_buf.appendSlice(str[1 .. str.len - 1]); } return pp.char_buf.items[char_top..]; } /// Handle the _Pragma operator (implemented as a builtin macro) fn pragmaOperator(pp: *Preprocessor, arg_tok: Token, operator_loc: Source.Location) !void { const arg_slice = pp.expandedSlice(arg_tok); const content = arg_slice[1 .. arg_slice.len - 1]; const directive = "#pragma "; pp.char_buf.clearRetainingCapacity(); const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline try pp.char_buf.ensureUnusedCapacity(total_len); pp.char_buf.appendSliceAssumeCapacity(directive); pp.destringify(content); pp.char_buf.appendAssumeCapacity('\n'); const start = pp.comp.generated_buf.items.len; try pp.comp.generated_buf.appendSlice(pp.char_buf.items); var tmp_tokenizer = Tokenizer{ .buf = pp.comp.generated_buf.items, .comp = pp.comp, .index = @intCast(start), .source = .generated, .line = pp.generated_line, }; pp.generated_line += 1; const hash_tok = tmp_tokenizer.next(); assert(hash_tok.id == .hash); const pragma_tok = tmp_tokenizer.next(); assert(pragma_tok.id == .keyword_pragma); try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice()); } /// Inverts the output of the preprocessor stringify (#) operation /// (except all whitespace is condensed to a single space) /// writes output to pp.char_buf; assumes capacity is sufficient /// backslash backslash -> backslash /// backslash doublequote -> doublequote /// All other characters remain the same fn destringify(pp: *Preprocessor, str: []const u8) void { var state: enum { start, backslash_seen } = .start; for (str) |c| { switch (c) { '\\' => { if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c); state = if (state == .start) .backslash_seen else .start; }, else => { if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\'); pp.char_buf.appendAssumeCapacity(c); state = .start; }, } } } /// Stringify `tokens` into pp.char_buf. /// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing fn stringify(pp: *Preprocessor, tokens: []const Token) !void { try pp.char_buf.append('"'); var ws_state: enum { start, need, not_needed } = .start; for (tokens) |tok| { if (tok.id == .macro_ws) { if (ws_state == .start) continue; ws_state = .need; continue; } if (ws_state == .need) try pp.char_buf.append(' '); ws_state = .not_needed; // backslashes not inside strings are not escaped const is_str = switch (tok.id) { .string_literal, .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32, .string_literal_wide, .char_literal, .char_literal_utf_16, .char_literal_utf_32, .char_literal_wide, => true, else => false, }; for (pp.expandedSlice(tok)) |c| { if (c == '"') try pp.char_buf.appendSlice("\\\"") else if (c == '\\' and is_str) try pp.char_buf.appendSlice("\\\\") else try pp.char_buf.append(c); } } if (pp.char_buf.items[pp.char_buf.items.len - 1] == '\\') { const tok = tokens[tokens.len - 1]; try pp.comp.diag.add(.{ .tag = .invalid_pp_stringify_escape, .loc = tok.loc, }, tok.expansionSlice()); pp.char_buf.items.len -= 1; } try pp.char_buf.appendSlice("\"\n"); } fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]const u8 { const char_top = pp.char_buf.items.len; defer pp.char_buf.items.len = char_top; // Trim leading/trailing whitespace var begin: usize = 0; var end: usize = param_toks.len; while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {} while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {} const params = param_toks[begin..end]; if (params.len == 0) { try pp.comp.diag.add(.{ .tag = .expected_filename, .loc = param_toks[0].loc, }, param_toks[0].expansionSlice()); return null; } // no string pasting if (params[0].id == .string_literal and params.len > 1) { try pp.comp.diag.add(.{ .tag = .closing_paren, .loc = params[1].loc, }, params[1].expansionSlice()); return null; } for (params) |tok| { const str = pp.expandedSliceExtra(tok, .preserve_macro_ws, false); try pp.char_buf.appendSlice(str); } const include_str = pp.char_buf.items[char_top..]; if (include_str.len < 3) { try pp.comp.diag.add(.{ .tag = .empty_filename, .loc = params[0].loc, }, params[0].expansionSlice()); return null; } switch (include_str[0]) { '<' => { if (include_str[include_str.len - 1] != '>') { // Ugly hack to find out where the '>' should go, since we don't have the closing ')' location const start = params[0].loc; try pp.comp.diag.add(.{ .tag = .header_str_closing, .loc = .{ .id = start.id, .byte_offset = start.byte_offset + @as(u32, @intCast(include_str.len)) + 1, .line = start.line }, }, params[0].expansionSlice()); try pp.comp.diag.add(.{ .tag = .header_str_match, .loc = params[0].loc, }, params[0].expansionSlice()); return null; } return include_str; }, '"' => return include_str, else => { try pp.comp.diag.add(.{ .tag = .expected_filename, .loc = params[0].loc, }, params[0].expansionSlice()); return null; }, } } fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool { switch (builtin) { .macro_param_has_attribute, .macro_param_has_declspec_attribute, .macro_param_has_feature, .macro_param_has_extension, .macro_param_has_builtin, => { var invalid: ?Token = null; var identifier: ?Token = null; for (param_toks) |tok| { if (tok.id == .macro_ws) continue; if (tok.id == .comment) continue; if (!tok.id.isMacroIdentifier()) { invalid = tok; break; } if (identifier) |_| invalid = tok else identifier = tok; } if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc }; if (invalid) |some| { try pp.comp.diag.add( .{ .tag = .feature_check_requires_identifier, .loc = some.loc }, some.expansionSlice(), ); return false; } const ident_str = pp.expandedSlice(identifier.?); return switch (builtin) { .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null, .macro_param_has_declspec_attribute => { return if (pp.comp.langopts.declspec_attrs) Attribute.fromString(.declspec, null, ident_str) != null else false; }, .macro_param_has_feature => features.hasFeature(pp.comp, ident_str), .macro_param_has_extension => features.hasExtension(pp.comp, ident_str), .macro_param_has_builtin => pp.comp.hasBuiltin(ident_str), else => unreachable, }; }, .macro_param_has_warning => { const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) { error.ExpectedStringLiteral => { try pp.comp.diag.add(.{ .tag = .expected_str_literal_in, .loc = param_toks[0].loc, .extra = .{ .str = "__has_warning" }, }, param_toks[0].expansionSlice()); return false; }, else => |e| return e, }; if (!mem.startsWith(u8, actual_param, "-W")) { try pp.comp.diag.add(.{ .tag = .malformed_warning_check, .loc = param_toks[0].loc, .extra = .{ .str = "__has_warning" }, }, param_toks[0].expansionSlice()); return false; } const warning_name = actual_param[2..]; return Diagnostics.warningExists(warning_name); }, .macro_param_is_identifier => { var invalid: ?Token = null; var identifier: ?Token = null; for (param_toks) |tok| switch (tok.id) { .macro_ws => continue, .comment => continue, else => { if (identifier) |_| invalid = tok else identifier = tok; }, }; if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc }; if (invalid) |some| { try pp.comp.diag.add(.{ .tag = .missing_tok_builtin, .loc = some.loc, .extra = .{ .tok_id_expected = .r_paren }, }, some.expansionSlice()); return false; } const id = identifier.?.id; return id == .identifier or id == .extended_identifier; }, .macro_param_has_include, .macro_param_has_include_next => { const include_str = (try pp.reconstructIncludeString(param_toks)) orelse return false; const include_type: Compilation.IncludeType = switch (include_str[0]) { '"' => .quotes, '<' => .angle_brackets, else => unreachable, }; const filename = include_str[1 .. include_str.len - 1]; if (builtin == .macro_param_has_include or pp.include_depth == 0) { if (builtin == .macro_param_has_include_next) { try pp.comp.diag.add(.{ .tag = .include_next_outside_header, .loc = src_loc, }, &.{}); } return pp.comp.hasInclude(filename, src_loc.id, include_type, .first); } return pp.comp.hasInclude(filename, src_loc.id, include_type, .next); }, else => unreachable, } } fn expandFuncMacro( pp: *Preprocessor, loc: Source.Location, func_macro: *const Macro, args: *const MacroArguments, expanded_args: *const MacroArguments, ) MacroError!ExpandBuf { var buf = ExpandBuf.init(pp.gpa); try buf.ensureTotalCapacity(func_macro.tokens.len); errdefer buf.deinit(); var expanded_variable_arguments = ExpandBuf.init(pp.gpa); defer expanded_variable_arguments.deinit(); var variable_arguments = ExpandBuf.init(pp.gpa); defer variable_arguments.deinit(); if (func_macro.var_args) { var i: usize = func_macro.params.len; while (i < expanded_args.items.len) : (i += 1) { try variable_arguments.appendSlice(args.items[i]); try expanded_variable_arguments.appendSlice(expanded_args.items[i]); if (i != expanded_args.items.len - 1) { const comma = Token{ .id = .comma, .loc = .{ .id = .generated } }; try variable_arguments.append(comma); try expanded_variable_arguments.append(comma); } } } // token concatenation and expansion phase var tok_i: usize = 0; while (tok_i < func_macro.tokens.len) : (tok_i += 1) { const raw = func_macro.tokens[tok_i]; switch (raw.id) { .hash_hash => while (tok_i + 1 < func_macro.tokens.len) { const raw_next = func_macro.tokens[tok_i + 1]; tok_i += 1; const next = switch (raw_next.id) { .macro_ws => continue, .hash_hash => continue, .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else &[1]Token{tokFromRaw(raw_next)}, .macro_param, .macro_param_no_expand => if (args.items[raw_next.end].len > 0) args.items[raw_next.end] else &[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })}, .keyword_va_args => variable_arguments.items, else => &[1]Token{tokFromRaw(raw_next)}, }; try pp.pasteTokens(&buf, next); if (next.len != 0) break; }, .macro_param_no_expand => { const slice = if (args.items[raw.end].len > 0) args.items[raw.end] else &[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })}; const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; try bufCopyTokens(&buf, slice, &.{raw_loc}); }, .macro_param => { const arg = expanded_args.items[raw.end]; const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; try bufCopyTokens(&buf, arg, &.{raw_loc}); }, .keyword_va_args => { const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc}); }, .stringify_param, .stringify_va_args => { const arg = if (raw.id == .stringify_va_args) variable_arguments.items else args.items[raw.end]; pp.char_buf.clearRetainingCapacity(); try pp.stringify(arg); const start = pp.comp.generated_buf.items.len; try pp.comp.generated_buf.appendSlice(pp.char_buf.items); try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw))); }, .macro_param_has_attribute, .macro_param_has_declspec_attribute, .macro_param_has_warning, .macro_param_has_feature, .macro_param_has_extension, .macro_param_has_builtin, .macro_param_has_include, .macro_param_has_include_next, .macro_param_is_identifier, => { const arg = expanded_args.items[0]; const result = if (arg.len == 0) blk: { const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } }; try pp.comp.diag.add(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{}); break :blk false; } else try pp.handleBuiltinMacro(raw.id, arg, loc); const start = pp.comp.generated_buf.items.len; try pp.comp.generated_buf.writer().print("{}\n", .{@intFromBool(result)}); try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw))); }, .macro_param_pragma_operator => { const param_toks = expanded_args.items[0]; // Clang and GCC require exactly one token (so, no parentheses or string pasting) // even though their error messages indicate otherwise. Ours is slightly more // descriptive. var invalid: ?Token = null; var string: ?Token = null; for (param_toks) |tok| switch (tok.id) { .string_literal => { if (string) |_| invalid = tok else string = tok; }, .macro_ws => continue, .comment => continue, else => { invalid = tok; break; }, }; if (string == null and invalid == null) invalid = .{ .loc = loc, .id = .eof }; if (invalid) |some| try pp.comp.diag.add( .{ .tag = .pragma_operator_string_literal, .loc = some.loc }, some.expansionSlice(), ) else try pp.pragmaOperator(string.?, loc); }, .comma => { if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) { const hash_hash = func_macro.tokens[tok_i + 1]; var maybe_va_args = func_macro.tokens[tok_i + 2]; var consumed: usize = 2; if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) { consumed = 3; maybe_va_args = func_macro.tokens[tok_i + 3]; } if (maybe_va_args.id == .keyword_va_args) { // GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty tok_i += consumed; if (func_macro.params.len == expanded_args.items.len) { // Empty __VA_ARGS__, drop the comma try pp.err(hash_hash, .comma_deletion_va_args); } else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) { // Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted" if (pp.comp.langopts.standard.isGNU()) { // GNU standard, drop the comma try pp.err(hash_hash, .comma_deletion_va_args); } else { // C standard, retain the comma try buf.append(tokFromRaw(raw)); } } else { try buf.append(tokFromRaw(raw)); if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) { try pp.err(hash_hash, .comma_deletion_va_args); } const raw_loc = Source.Location{ .id = maybe_va_args.source, .byte_offset = maybe_va_args.start, .line = maybe_va_args.line, }; try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc}); } continue; } } // Regular comma, no token pasting with __VA_ARGS__ try buf.append(tokFromRaw(raw)); }, else => try buf.append(tokFromRaw(raw)), } } removePlacemarkers(&buf); return buf; } fn shouldExpand(tok: Token, macro: *Macro) bool { // macro.loc.line contains the macros end index if (tok.loc.id == macro.loc.id and tok.loc.byte_offset >= macro.loc.byte_offset and tok.loc.byte_offset <= macro.loc.line) return false; for (tok.expansionSlice()) |loc| { if (loc.id == macro.loc.id and loc.byte_offset >= macro.loc.byte_offset and loc.byte_offset <= macro.loc.line) return false; } if (tok.flags.expansion_disabled) return false; return true; } fn bufCopyTokens(buf: *ExpandBuf, tokens: []const Token, src: []const Source.Location) !void { try buf.ensureUnusedCapacity(tokens.len); for (tokens) |tok| { var copy = try tok.dupe(buf.allocator); errdefer Token.free(copy.expansion_locs, buf.allocator); try copy.addExpansionLocation(buf.allocator, src); buf.appendAssumeCapacity(copy); } } fn nextBufToken( pp: *Preprocessor, tokenizer: *Tokenizer, buf: *ExpandBuf, start_idx: *usize, end_idx: *usize, extend_buf: bool, ) Error!Token { start_idx.* += 1; if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) { if (extend_buf) { const raw_tok = tokenizer.next(); if (raw_tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null) try pp.err(raw_tok, .poisoned_identifier); if (raw_tok.id == .nl) pp.add_expansion_nl += 1; const new_tok = tokFromRaw(raw_tok); end_idx.* += 1; try buf.append(new_tok); return new_tok; } else { return Token{ .id = .eof, .loc = .{ .id = .generated } }; } } else { return buf.items[start_idx.*]; } } fn collectMacroFuncArguments( pp: *Preprocessor, tokenizer: *Tokenizer, buf: *ExpandBuf, start_idx: *usize, end_idx: *usize, extend_buf: bool, is_builtin: bool, ) !MacroArguments { const name_tok = buf.items[start_idx.*]; const saved_tokenizer = tokenizer.*; const old_end = end_idx.*; while (true) { const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); switch (tok.id) { .nl, .whitespace, .macro_ws => {}, .l_paren => break, else => { if (is_builtin) { try pp.comp.diag.add(.{ .tag = .missing_lparen_after_builtin, .loc = name_tok.loc, .extra = .{ .str = pp.expandedSlice(name_tok) }, }, tok.expansionSlice()); } // Not a macro function call, go over normal identifier, rewind tokenizer.* = saved_tokenizer; end_idx.* = old_end; return error.MissingLParen; }, } } // collect the arguments. var parens: u32 = 0; var args = MacroArguments.init(pp.gpa); errdefer deinitMacroArguments(pp.gpa, &args); var curArgument = std.ArrayList(Token).init(pp.gpa); defer curArgument.deinit(); while (true) { var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); tok.flags.is_macro_arg = true; switch (tok.id) { .comma => { if (parens == 0) { const owned = try curArgument.toOwnedSlice(); errdefer pp.gpa.free(owned); try args.append(owned); } else { const duped = try tok.dupe(pp.gpa); errdefer Token.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); } }, .l_paren => { const duped = try tok.dupe(pp.gpa); errdefer Token.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); parens += 1; }, .r_paren => { if (parens == 0) { const owned = try curArgument.toOwnedSlice(); errdefer pp.gpa.free(owned); try args.append(owned); break; } else { const duped = try tok.dupe(pp.gpa); errdefer Token.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); parens -= 1; } }, .eof => { { const owned = try curArgument.toOwnedSlice(); errdefer pp.gpa.free(owned); try args.append(owned); } tokenizer.* = saved_tokenizer; try pp.comp.diag.add( .{ .tag = .unterminated_macro_arg_list, .loc = name_tok.loc }, name_tok.expansionSlice(), ); return error.Unterminated; }, .nl, .whitespace => { try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc }); }, else => { const duped = try tok.dupe(pp.gpa); errdefer Token.free(duped.expansion_locs, pp.gpa); try curArgument.append(duped); }, } } return args; } fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void { for (buf.items[start .. start + len]) |tok| Token.free(tok.expansion_locs, pp.gpa); try buf.replaceRange(start, len, &.{}); moving_end_idx.* -|= len; } /// The behavior of `defined` depends on whether we are in a preprocessor /// expression context (#if or #elif) or not. /// In a non-expression context it's just an identifier. Within a preprocessor /// expression it is a unary operator or one-argument function. const EvalContext = enum { expr, non_expr, }; /// Helper for safely iterating over a slice of tokens while skipping whitespace const TokenIterator = struct { toks: []const Token, i: usize, fn init(toks: []const Token) TokenIterator { return .{ .toks = toks, .i = 0 }; } fn nextNoWS(self: *TokenIterator) ?Token { while (self.i < self.toks.len) : (self.i += 1) { const tok = self.toks[self.i]; if (tok.id == .whitespace or tok.id == .macro_ws) continue; self.i += 1; return tok; } return null; } }; fn expandMacroExhaustive( pp: *Preprocessor, tokenizer: *Tokenizer, buf: *ExpandBuf, start_idx: usize, end_idx: usize, extend_buf: bool, eval_ctx: EvalContext, ) MacroError!void { var moving_end_idx = end_idx; var advance_index: usize = 0; // rescan loop var do_rescan = true; while (do_rescan) { do_rescan = false; // expansion loop var idx: usize = start_idx + advance_index; while (idx < moving_end_idx) { const macro_tok = buf.items[idx]; if (macro_tok.id == .keyword_defined and eval_ctx == .expr) { idx += 1; var it = TokenIterator.init(buf.items[idx..moving_end_idx]); if (it.nextNoWS()) |tok| { switch (tok.id) { .l_paren => { _ = it.nextNoWS(); // eat (what should be) identifier _ = it.nextNoWS(); // eat (what should be) r paren }, .identifier, .extended_identifier => {}, else => {}, } } idx += it.i; continue; } const macro_entry = pp.defines.getPtr(pp.expandedSlice(macro_tok)); if (macro_entry == null or !shouldExpand(buf.items[idx], macro_entry.?)) { idx += 1; continue; } if (macro_entry) |macro| macro_handler: { if (macro.is_func) { var macro_scan_idx = idx; // to be saved in case this doesn't turn out to be a call const args = pp.collectMacroFuncArguments( tokenizer, buf, ¯o_scan_idx, &moving_end_idx, extend_buf, macro.is_builtin, ) catch |er| switch (er) { error.MissingLParen => { if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true; idx += 1; break :macro_handler; }, error.Unterminated => { if (pp.comp.langopts.emulate == .gcc) idx += 1; try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx); break :macro_handler; }, else => |e| return e, }; defer { for (args.items) |item| { pp.gpa.free(item); } args.deinit(); } var args_count: u32 = @intCast(args.items.len); // if the macro has zero arguments g() args_count is still 1 // an empty token list g() and a whitespace-only token list g( ) // counts as zero arguments for the purposes of argument-count validation if (args_count == 1 and macro.params.len == 0) { for (args.items[0]) |tok| { if (tok.id != .macro_ws) break; } else { args_count = 0; } } // Validate argument count. const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = @intCast(macro.params.len), .actual = args_count }, }; if (macro.var_args and args_count < macro.params.len) { try pp.comp.diag.add( .{ .tag = .expected_at_least_arguments, .loc = buf.items[idx].loc, .extra = extra }, buf.items[idx].expansionSlice(), ); idx += 1; try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx); continue; } if (!macro.var_args and args_count != macro.params.len) { try pp.comp.diag.add( .{ .tag = .expected_arguments, .loc = buf.items[idx].loc, .extra = extra }, buf.items[idx].expansionSlice(), ); idx += 1; try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx); continue; } var expanded_args = MacroArguments.init(pp.gpa); defer deinitMacroArguments(pp.gpa, &expanded_args); try expanded_args.ensureTotalCapacity(args.items.len); for (args.items) |arg| { var expand_buf = ExpandBuf.init(pp.gpa); errdefer expand_buf.deinit(); try expand_buf.appendSlice(arg); try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx); expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice()); } var res = try pp.expandFuncMacro(macro_tok.loc, macro, &args, &expanded_args); defer res.deinit(); const tokens_added = res.items.len; const macro_expansion_locs = macro_tok.expansionSlice(); for (res.items) |*tok| { try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc}); try tok.addExpansionLocation(pp.gpa, macro_expansion_locs); } const tokens_removed = macro_scan_idx - idx + 1; for (buf.items[idx .. idx + tokens_removed]) |tok| Token.free(tok.expansion_locs, pp.gpa); try buf.replaceRange(idx, tokens_removed, res.items); moving_end_idx += tokens_added; // Overflow here means that we encountered an unterminated argument list // while expanding the body of this macro. moving_end_idx -|= tokens_removed; idx += tokens_added; do_rescan = true; } else { const res = try pp.expandObjMacro(macro); defer res.deinit(); const macro_expansion_locs = macro_tok.expansionSlice(); var increment_idx_by = res.items.len; for (res.items, 0..) |*tok, i| { tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg; try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc}); try tok.addExpansionLocation(pp.gpa, macro_expansion_locs); if (tok.id == .keyword_defined and eval_ctx == .expr) { try pp.comp.diag.add(.{ .tag = .expansion_to_defined, .loc = tok.loc, }, tok.expansionSlice()); } if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) { increment_idx_by = i; } } Token.free(buf.items[idx].expansion_locs, pp.gpa); try buf.replaceRange(idx, 1, res.items); idx += increment_idx_by; moving_end_idx = moving_end_idx + res.items.len - 1; do_rescan = true; } } if (idx - start_idx == advance_index + 1 and !do_rescan) { advance_index += 1; } } // end of replacement phase } // end of scanning phase // trim excess buffer for (buf.items[moving_end_idx..]) |item| { Token.free(item.expansion_locs, pp.gpa); } buf.items.len = moving_end_idx; } /// Try to expand a macro after a possible candidate has been read from the `tokenizer` /// into the `raw` token passed as argument fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void { var source_tok = tokFromRaw(raw); if (!raw.id.isMacroIdentifier()) { source_tok.id.simplifyMacroKeyword(); return pp.tokens.append(pp.gpa, source_tok); } pp.top_expansion_buf.items.len = 0; try pp.top_expansion_buf.append(source_tok); pp.expansion_source_loc = source_tok.loc; try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr); try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len); for (pp.top_expansion_buf.items) |*tok| { if (tok.id == .macro_ws and !pp.preserve_whitespace) { Token.free(tok.expansion_locs, pp.gpa); continue; } if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) { Token.free(tok.expansion_locs, pp.gpa); continue; } tok.id.simplifyMacroKeywordExtra(true); pp.tokens.appendAssumeCapacity(tok.*); } if (pp.preserve_whitespace) { try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl); while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) { pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); } } } fn expandedSliceExtra( pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }, path_escapes: bool, ) []const u8 { if (tok.id.lexeme()) |some| { if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some; } var tmp_tokenizer = Tokenizer{ .buf = pp.comp.getSource(tok.loc.id).buf, .comp = pp.comp, .index = tok.loc.byte_offset, .source = .generated, .path_escapes = path_escapes, }; if (tok.id == .macro_string) { while (true) : (tmp_tokenizer.index += 1) { if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break; } return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1]; } const res = tmp_tokenizer.next(); return tmp_tokenizer.buf[res.start..res.end]; } /// Get expanded token source string. pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 { return pp.expandedSliceExtra(tok, .single_macro_ws, false); } /// Concat two tokens and add the result to pp.generated fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void { const lhs = while (lhs_toks.popOrNull()) |lhs| { if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or (lhs.id != .macro_ws and lhs.id != .comment)) break lhs; Token.free(lhs.expansion_locs, pp.gpa); } else { return bufCopyTokens(lhs_toks, rhs_toks, &.{}); }; var rhs_rest: u32 = 1; const rhs = for (rhs_toks) |rhs| { if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or (rhs.id != .macro_ws and rhs.id != .comment)) break rhs; rhs_rest += 1; } else { return lhs_toks.appendAssumeCapacity(lhs); }; defer Token.free(lhs.expansion_locs, pp.gpa); const start = pp.comp.generated_buf.items.len; const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len; try pp.comp.generated_buf.ensureTotalCapacity(end + 1); // +1 for a newline // We cannot use the same slices here since they might be invalidated by `ensureCapacity` pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs)); pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs)); pp.comp.generated_buf.appendAssumeCapacity('\n'); // Try to tokenize the result. var tmp_tokenizer = Tokenizer{ .buf = pp.comp.generated_buf.items, .comp = pp.comp, .index = @intCast(start), .source = .generated, }; const pasted_token = tmp_tokenizer.nextNoWSComments(); const next = tmp_tokenizer.nextNoWSComments(); const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker) .placemarker else pasted_token.id; try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs)); if (next.id != .nl and next.id != .eof) { try pp.comp.diag.add(.{ .tag = .pasting_formed_invalid, .loc = lhs.loc, .extra = .{ .str = try pp.comp.diag.arena.allocator().dupe( u8, pp.comp.generated_buf.items[start..end], ) }, }, lhs.expansionSlice()); try lhs_toks.append(tokFromRaw(next)); } try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{}); } fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Token) !Token { var pasted_token = Token{ .id = id, .loc = .{ .id = .generated, .byte_offset = @intCast(start), .line = pp.generated_line, } }; pp.generated_line += 1; try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc}); try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice()); return pasted_token; } /// Defines a new macro and warns if it is a duplicate fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void { const name_str = pp.tokSlice(name_tok); const gop = try pp.defines.getOrPut(name_str); if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) { try pp.comp.diag.add(.{ .tag = if (gop.value_ptr.is_builtin) .builtin_macro_redefined else .macro_redefined, .loc = .{ .id = name_tok.source, .byte_offset = name_tok.start, .line = name_tok.line }, .extra = .{ .str = name_str }, }, &.{}); // TODO add a previous definition note } if (pp.verbose) { pp.verboseLog(name_tok, "macro {s} defined", .{name_str}); } gop.value_ptr.* = macro; } /// Handle a #define directive. fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { // Get macro name and validate it. const macro_name = tokenizer.nextNoWS(); if (macro_name.id == .keyword_defined) { try pp.err(macro_name, .defined_as_macro_name); return skipToNl(tokenizer); } if (!macro_name.id.isMacroIdentifier()) { try pp.err(macro_name, .macro_name_must_be_identifier); return skipToNl(tokenizer); } var macro_name_token_id = macro_name.id; macro_name_token_id.simplifyMacroKeyword(); switch (macro_name_token_id) { .identifier, .extended_identifier => {}, else => if (macro_name_token_id.isMacroIdentifier()) { try pp.err(macro_name, .keyword_macro); }, } // Check for function macros and empty defines. var first = tokenizer.next(); switch (first.id) { .nl, .eof => return pp.defineMacro(macro_name, .{ .params = undefined, .tokens = undefined, .var_args = false, .loc = undefined, .is_func = false, }), .whitespace => first = tokenizer.next(), .l_paren => return pp.defineFn(tokenizer, macro_name, first), else => try pp.err(first, .whitespace_after_macro_name), } if (first.id == .hash_hash) { try pp.err(first, .hash_hash_at_start); return skipToNl(tokenizer); } first.id.simplifyMacroKeyword(); pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time. var need_ws = false; // Collect the token body and validate any ## found. var tok = first; const end_index = while (true) { tok.id.simplifyMacroKeyword(); switch (tok.id) { .hash_hash => { const next = tokenizer.nextNoWSComments(); switch (next.id) { .nl, .eof => { try pp.err(tok, .hash_hash_at_end); return; }, .hash_hash => { try pp.err(next, .hash_hash_at_end); return; }, else => {}, } try pp.token_buf.append(tok); try pp.token_buf.append(next); }, .nl, .eof => break tok.start, .comment => if (pp.comp.langopts.preserve_comments_in_macros) { if (need_ws) { need_ws = false; try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); } try pp.token_buf.append(tok); }, .whitespace => need_ws = true, else => { if (tok.id != .whitespace and need_ws) { need_ws = false; try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); } try pp.token_buf.append(tok); }, } tok = tokenizer.next(); } else unreachable; const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); try pp.defineMacro(macro_name, .{ .loc = .{ .id = macro_name.source, .byte_offset = first.start, .line = end_index, }, .tokens = list, .params = undefined, .is_func = false, .var_args = false, }); } /// Handle a function like #define directive. fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void { assert(macro_name.id.isMacroIdentifier()); var params = std.ArrayList([]const u8).init(pp.gpa); defer params.deinit(); // Parse the parameter list. var gnu_var_args: []const u8 = ""; var var_args = false; const start_index = while (true) { var tok = tokenizer.nextNoWS(); if (tok.id == .r_paren) break tok.end; if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list); if (tok.id == .ellipsis) { var_args = true; const r_paren = tokenizer.nextNoWS(); if (r_paren.id != .r_paren) { try pp.err(r_paren, .missing_paren_param_list); try pp.err(l_paren, .to_match_paren); return skipToNl(tokenizer); } break r_paren.end; } if (!tok.id.isMacroIdentifier()) { try pp.err(tok, .invalid_token_param_list); return skipToNl(tokenizer); } try params.append(pp.tokSlice(tok)); tok = tokenizer.nextNoWS(); if (tok.id == .ellipsis) { try pp.err(tok, .gnu_va_macro); gnu_var_args = params.pop(); const r_paren = tokenizer.nextNoWS(); if (r_paren.id != .r_paren) { try pp.err(r_paren, .missing_paren_param_list); try pp.err(l_paren, .to_match_paren); return skipToNl(tokenizer); } break r_paren.end; } else if (tok.id == .r_paren) { break tok.end; } else if (tok.id != .comma) { try pp.err(tok, .expected_comma_param_list); return skipToNl(tokenizer); } } else unreachable; var need_ws = false; // Collect the body tokens and validate # and ##'s found. pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time. const end_index = tok_loop: while (true) { var tok = tokenizer.next(); switch (tok.id) { .nl, .eof => break tok.start, .whitespace => need_ws = pp.token_buf.items.len != 0, .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else { if (need_ws) { need_ws = false; try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); } try pp.token_buf.append(tok); }, .hash => { if (tok.id != .whitespace and need_ws) { need_ws = false; try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); } const param = tokenizer.nextNoWS(); blk: { if (var_args and param.id == .keyword_va_args) { tok.id = .stringify_va_args; try pp.token_buf.append(tok); continue :tok_loop; } if (!param.id.isMacroIdentifier()) break :blk; const s = pp.tokSlice(param); if (mem.eql(u8, s, gnu_var_args)) { tok.id = .stringify_va_args; try pp.token_buf.append(tok); continue :tok_loop; } for (params.items, 0..) |p, i| { if (mem.eql(u8, p, s)) { tok.id = .stringify_param; tok.end = @intCast(i); try pp.token_buf.append(tok); continue :tok_loop; } } } try pp.err(param, .hash_not_followed_param); return skipToNl(tokenizer); }, .hash_hash => { need_ws = false; // if ## appears at the beginning, the token buf is still empty // in this case, error out if (pp.token_buf.items.len == 0) { try pp.err(tok, .hash_hash_at_start); return skipToNl(tokenizer); } const saved_tokenizer = tokenizer.*; const next = tokenizer.nextNoWSComments(); if (next.id == .nl or next.id == .eof) { try pp.err(tok, .hash_hash_at_end); return; } tokenizer.* = saved_tokenizer; // convert the previous token to .macro_param_no_expand if it was .macro_param if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) { pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand; } try pp.token_buf.append(tok); }, else => { if (tok.id != .whitespace and need_ws) { need_ws = false; try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); } if (var_args and tok.id == .keyword_va_args) { // do nothing } else if (tok.id.isMacroIdentifier()) { tok.id.simplifyMacroKeyword(); const s = pp.tokSlice(tok); if (mem.eql(u8, gnu_var_args, s)) { tok.id = .keyword_va_args; } else for (params.items, 0..) |param, i| { if (mem.eql(u8, param, s)) { // NOTE: it doesn't matter to assign .macro_param_no_expand // here in case a ## was the previous token, because // ## processing will eat this token with the same semantics tok.id = .macro_param; tok.end = @intCast(i); break; } } } try pp.token_buf.append(tok); }, } } else unreachable; const param_list = try pp.arena.allocator().dupe([]const u8, params.items); const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); try pp.defineMacro(macro_name, .{ .is_func = true, .params = param_list, .var_args = var_args or gnu_var_args.len != 0, .tokens = token_list, .loc = .{ .id = macro_name.source, .byte_offset = start_index, .line = end_index, }, }); } /// Handle an #embed directive fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void { tokenizer.path_escapes = true; defer tokenizer.path_escapes = false; const first = tokenizer.nextNoWS(); const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) { error.InvalidInclude => return, else => |e| return e, }; // Check for empty filename. const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true); if (tok_slice.len < 3) { try pp.err(first, .empty_filename); return; } const filename = tok_slice[1 .. tok_slice.len - 1]; const include_type: Compilation.IncludeType = switch (filename_tok.id) { .string_literal => .quotes, .macro_string => .angle_brackets, else => unreachable, }; const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type)) orelse return pp.fatal(first, "'{s}' not found", .{filename}); defer pp.comp.gpa.free(embed_bytes); if (embed_bytes.len == 0) return; try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, 2 * embed_bytes.len - 1); // N bytes and N-1 commas // TODO: We currently only support systems with CHAR_BIT == 8 // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes // and correctly account for the target's endianness const writer = pp.comp.generated_buf.writer(); { const byte = embed_bytes[0]; const start = pp.comp.generated_buf.items.len; try writer.print("{d}", .{byte}); pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok)); } for (embed_bytes[1..]) |byte| { const start = pp.comp.generated_buf.items.len; try writer.print(",{d}", .{byte}); pp.tokens.appendAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } }); pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok)); } try pp.comp.generated_buf.append('\n'); } // Handle a #include directive. fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void { tokenizer.path_escapes = true; defer tokenizer.path_escapes = false; const first = tokenizer.nextNoWS(); const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) { error.InvalidInclude => return, else => |e| return e, }; // Prevent stack overflow pp.include_depth += 1; defer pp.include_depth -= 1; if (pp.include_depth > max_include_depth) { try pp.comp.diag.add(.{ .tag = .too_many_includes, .loc = .{ .id = first.source, .byte_offset = first.start, .line = first.line }, }, &.{}); return error.StopPreprocessing; } if (pp.include_guards.get(new_source.id)) |guard| { if (pp.defines.contains(guard)) return; } if (pp.verbose) { pp.verboseLog(first, "include file {s}", .{new_source.path}); } const tokens_start = pp.tokens.len; try pp.addIncludeStart(new_source); const eof = pp.preprocessExtra(new_source) catch |er| switch (er) { error.StopPreprocessing => { for (pp.tokens.items(.expansion_locs)[tokens_start..]) |loc| Token.free(loc, pp.gpa); pp.tokens.len = tokens_start; return; }, else => |e| return e, }; try eof.checkMsEof(new_source, pp.comp); if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) { try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{ .id = tokenizer.source, .line = tokenizer.line, } }); } if (pp.linemarkers == .none) return; var next = first; while (true) { var tmp = tokenizer.*; next = tmp.nextNoWS(); if (next.id != .nl) break; tokenizer.* = tmp; } try pp.addIncludeResume(next.source, next.end, next.line); } /// tokens that are part of a pragma directive can happen in 3 ways: /// 1. directly in the text via `#pragma ...` /// 2. Via a string literal argument to `_Pragma` /// 3. Via a stringified macro argument which is used as an argument to `_Pragma` /// operator_loc: Location of `_Pragma`; null if this is from #pragma /// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !Token { var tok = tokFromRaw(raw); if (operator_loc) |loc| { try tok.addExpansionLocation(pp.gpa, &.{loc}); } try tok.addExpansionLocation(pp.gpa, arg_locs); return tok; } /// Handle a pragma directive fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void { const name_tok = tokenizer.nextNoWS(); if (name_tok.id == .nl or name_tok.id == .eof) return; const name = pp.tokSlice(name_tok); try pp.tokens.append(pp.gpa, try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs)); const pragma_start: u32 = @intCast(pp.tokens.len); const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs); try pp.tokens.append(pp.gpa, pragma_name_tok); while (true) { const next_tok = tokenizer.next(); if (next_tok.id == .whitespace) continue; if (next_tok.id == .eof) { try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{ .id = .generated }, }); break; } try pp.tokens.append(pp.gpa, try pp.makePragmaToken(next_tok, operator_loc, arg_locs)); if (next_tok.id == .nl) break; } if (pp.comp.getPragma(name)) |prag| unknown: { return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) { error.UnknownPragma => break :unknown, else => |e| return e, }; } return pp.comp.diag.add(.{ .tag = .unknown_pragma, .loc = pragma_name_tok.loc, }, pragma_name_tok.expansionSlice()); } fn findIncludeFilenameToken( pp: *Preprocessor, first_token: RawToken, tokenizer: *Tokenizer, trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof }, ) !Token { const start = pp.tokens.len; defer pp.tokens.len = start; var first = first_token; if (first.id == .angle_bracket_left) to_end: { // The tokenizer does not handle include strings so do it here. while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) { switch (tokenizer.buf[tokenizer.index]) { '>' => { tokenizer.index += 1; first.end = tokenizer.index; first.id = .macro_string; break :to_end; }, '\n' => break, else => {}, } } try pp.comp.diag.add(.{ .tag = .header_str_closing, .loc = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line }, }, &.{}); try pp.err(first, .header_str_match); } // Try to expand if the argument is a macro. try pp.expandMacro(tokenizer, first); // Check that we actually got a string. const filename_tok = pp.tokens.get(start); switch (filename_tok.id) { .string_literal, .macro_string => {}, else => { try pp.err(first, .expected_filename); try pp.expectNl(tokenizer); return error.InvalidInclude; }, } switch (trailing_token_behavior) { .expect_nl_eof => { // Error on extra tokens. const nl = tokenizer.nextNoWS(); if ((nl.id != .nl and nl.id != .eof) or pp.tokens.len > start + 1) { skipToNl(tokenizer); try pp.err(first, .extra_tokens_directive_end); } }, .ignore_trailing_tokens => {}, } return filename_tok; } fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source { const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof); // Check for empty filename. const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true); if (tok_slice.len < 3) { try pp.err(first, .empty_filename); return error.InvalidInclude; } // Find the file. const filename = tok_slice[1 .. tok_slice.len - 1]; const include_type: Compilation.IncludeType = switch (filename_tok.id) { .string_literal => .quotes, .macro_string => .angle_brackets, else => unreachable, }; return (try pp.comp.findInclude(filename, first, include_type, which)) orelse pp.fatal(first, "'{s}' not found", .{filename}); } fn printLinemarker( pp: *Preprocessor, w: anytype, line_no: u32, source: Source, start_resume: enum(u8) { start, @"resume", none }, ) !void { try w.writeByte('#'); if (pp.linemarkers == .line_directives) try w.writeAll("line"); // line_no is 0 indexed try w.print(" {d} \"{s}\"", .{ line_no + 1, source.path }); if (pp.linemarkers == .numeric_directives) { switch (start_resume) { .none => {}, .start => try w.writeAll(" 1"), .@"resume" => try w.writeAll(" 2"), } switch (source.kind) { .user => {}, .system => try w.writeAll(" 3"), .extern_c_system => try w.writeAll(" 3 4"), } } try w.writeByte('\n'); } // After how many empty lines are needed to replace them with linemarkers. const collapse_newlines = 8; /// Pretty print tokens and try to preserve whitespace. pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void { const tok_ids = pp.tokens.items(.id); var i: u32 = 0; var last_nl = true; outer: while (true) : (i += 1) { var cur: Token = pp.tokens.get(i); switch (cur.id) { .eof => { if (!last_nl) try w.writeByte('\n'); return; }, .nl => { var newlines: u32 = 0; for (tok_ids[i..], i..) |id, j| { if (id == .nl) { newlines += 1; } else if (id == .eof) { if (!last_nl) try w.writeByte('\n'); return; } else if (id != .whitespace) { if (pp.linemarkers == .none) { if (newlines < 2) break; } else if (newlines < collapse_newlines) { break; } i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace)); if (!last_nl) try w.writeAll("\n"); if (pp.linemarkers != .none) { const next = pp.tokens.get(i); const source = pp.comp.getSource(next.loc.id); const line_col = source.lineCol(next.loc); try pp.printLinemarker(w, line_col.line_no, source, .none); last_nl = true; } continue :outer; } } last_nl = true; try w.writeAll("\n"); }, .keyword_pragma => { const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1)); const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1; const pragma_len = @as(u32, @intCast(end_idx)) - i; if (pp.comp.getPragma(pragma_name)) |prag| { if (!prag.shouldPreserveTokens(pp, i + 1)) { try w.writeByte('\n'); i += pragma_len; cur = pp.tokens.get(i); continue; } } try w.writeAll("#pragma"); i += 1; while (true) : (i += 1) { cur = pp.tokens.get(i); if (cur.id == .nl) { try w.writeByte('\n'); last_nl = true; break; } try w.writeByte(' '); const slice = pp.expandedSlice(cur); try w.writeAll(slice); } }, .whitespace => { var slice = pp.expandedSlice(cur); while (mem.indexOfScalar(u8, slice, '\n')) |some| { if (pp.linemarkers != .none) try w.writeByte('\n'); slice = slice[some + 1 ..]; } for (slice) |_| try w.writeByte(' '); last_nl = false; }, .include_start => { const source = pp.comp.getSource(cur.loc.id); try pp.printLinemarker(w, 0, source, .start); last_nl = true; }, .include_resume => { const source = pp.comp.getSource(cur.loc.id); const line_col = source.lineCol(cur.loc); if (!last_nl) try w.writeAll("\n"); try pp.printLinemarker(w, line_col.line_no, source, .@"resume"); last_nl = true; }, else => { const slice = pp.expandedSlice(cur); try w.writeAll(slice); last_nl = false; }, } } } test "Preserve pragma tokens sometimes" { const allocator = std.testing.allocator; const Test = struct { fn runPreprocessor(source_text: []const u8) ![]const u8 { var buf = std.ArrayList(u8).init(allocator); defer buf.deinit(); var comp = Compilation.init(allocator); defer comp.deinit(); try comp.addDefaultPragmaHandlers(); var pp = Preprocessor.init(&comp); defer pp.deinit(); pp.preserve_whitespace = true; assert(pp.linemarkers == .none); const test_runner_macros = try comp.addSourceFromBuffer("", source_text); const eof = try pp.preprocess(test_runner_macros); try pp.tokens.append(pp.gpa, eof); try pp.prettyPrintTokens(buf.writer()); return allocator.dupe(u8, buf.items); } fn check(source_text: []const u8, expected: []const u8) !void { const output = try runPreprocessor(source_text); defer allocator.free(output); try std.testing.expectEqualStrings(expected, output); } }; const preserve_gcc_diagnostic = \\#pragma GCC diagnostic error "-Wnewline-eof" \\#pragma GCC warning error "-Wnewline-eof" \\int x; \\#pragma GCC ignored error "-Wnewline-eof" \\ ; try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic); const omit_once = \\#pragma once \\int x; \\#pragma once \\ ; // TODO should only be one newline afterwards when emulating clang try Test.check(omit_once, "\nint x;\n\n"); const omit_poison = \\#pragma GCC poison foobar \\ ; try Test.check(omit_poison, "\n"); } test "destringify" { const allocator = std.testing.allocator; const Test = struct { fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void { pp.char_buf.clearRetainingCapacity(); try pp.char_buf.ensureUnusedCapacity(stringified.len); pp.destringify(stringified); try std.testing.expectEqualStrings(destringified, pp.char_buf.items); } }; var comp = Compilation.init(allocator); defer comp.deinit(); var pp = Preprocessor.init(&comp); defer pp.deinit(); try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n"); try Test.testDestringify(&pp, \\ \"FOO BAR BAZ\" , \\ "FOO BAR BAZ" ); try Test.testDestringify(&pp, \\ \\t\\n \\ , \\ \t\n \\ ); } test "Include guards" { const Test = struct { /// This is here so that when #elifdef / #elifndef are added we don't forget /// to test that they don't accidentally break include guard detection fn pairsWithIfndef(tok_id: RawToken.Id) bool { return switch (tok_id) { .keyword_elif, .keyword_elifdef, .keyword_elifndef, .keyword_else, => true, .keyword_include, .keyword_include_next, .keyword_embed, .keyword_define, .keyword_defined, .keyword_undef, .keyword_ifdef, .keyword_ifndef, .keyword_error, .keyword_warning, .keyword_pragma, .keyword_line, .keyword_endif, => false, else => unreachable, }; } fn skippable(tok_id: RawToken.Id) bool { return switch (tok_id) { .keyword_defined, .keyword_va_args, .keyword_endif => true, else => false, }; } fn testIncludeGuard(allocator: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void { var comp = Compilation.init(allocator); defer comp.deinit(); var pp = Preprocessor.init(&comp); defer pp.deinit(); const path = try std.fs.path.join(allocator, &.{ ".", "bar.h" }); defer allocator.free(path); _ = try comp.addSourceFromBuffer(path, "int bar = 5;\n"); var buf = std.ArrayList(u8).init(allocator); defer buf.deinit(); var writer = buf.writer(); switch (tok_id) { .keyword_include, .keyword_include_next => try writer.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }), .keyword_define, .keyword_undef => try writer.print(template, .{ tok_id.lexeme().?, " BAR" }), .keyword_ifndef, .keyword_ifdef, .keyword_elifdef, .keyword_elifndef, => try writer.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }), else => try writer.print(template, .{ tok_id.lexeme().?, "" }), } const source = try comp.addSourceFromBuffer("test.h", buf.items); _ = try pp.preprocess(source); try std.testing.expectEqual(expected_guards, pp.include_guards.count()); } }; const tags = std.meta.tags(RawToken.Id); for (tags) |tag| { if (Test.skippable(tag)) continue; var copy = tag; copy.simplifyMacroKeyword(); if (copy != tag or tag == .keyword_else) { const inside_ifndef_template = \\//Leading comment (should be ignored) \\ \\#ifndef FOO \\#{s}{s} \\#endif ; const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1; try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards); const outside_ifndef_template = \\#ifndef FOO \\#endif \\#{s}{s} ; try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0); } } }