From 3b23929be565a66a94441d107448860079c2847d Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 2 Feb 2020 01:40:46 +0200 Subject: [PATCH] use std.c.tokenizer in translate-c --- src-self-hosted/c_tokenizer.zig | 977 -------------------------------- src-self-hosted/translate_c.zig | 373 +++++++++--- 2 files changed, 281 insertions(+), 1069 deletions(-) delete mode 100644 src-self-hosted/c_tokenizer.zig diff --git a/src-self-hosted/c_tokenizer.zig b/src-self-hosted/c_tokenizer.zig deleted file mode 100644 index 7e085bcf78..0000000000 --- a/src-self-hosted/c_tokenizer.zig +++ /dev/null @@ -1,977 +0,0 @@ -const std = @import("std"); -const expect = std.testing.expect; -const ZigClangSourceLocation = @import("clang.zig").ZigClangSourceLocation; -const Context = @import("translate_c.zig").Context; -const failDecl = @import("translate_c.zig").failDecl; - -pub const TokenList = std.SegmentedList(CToken, 32); - -pub const CToken = struct { - id: Id, - bytes: []const u8 = "", - num_lit_suffix: NumLitSuffix = .None, - - pub const Id = enum { - CharLit, - StrLit, - NumLitInt, - NumLitFloat, - Identifier, - Plus, - Minus, - Slash, - LParen, - RParen, - Eof, - Dot, - Asterisk, // * - Ampersand, // & - And, // && - Assign, // = - Or, // || - Bang, // ! - Tilde, // ~ - Shl, // << - Shr, // >> - Lt, // < - Lte, // <= - Gt, // > - Gte, // >= - Eq, // == - Ne, // != - Increment, // ++ - Decrement, // -- - Comma, - Fn, - Arrow, // -> - LBrace, - RBrace, - Pipe, - QuestionMark, - Colon, - }; - - pub const NumLitSuffix = enum { - None, - F, - L, - U, - LU, - LL, - LLU, - }; -}; - -pub fn tokenizeCMacro(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, tl: *TokenList, chars: [*:0]const u8) !void { - var index: usize = 0; - var first = true; - while (true) { - const tok = try next(ctx, loc, name, chars, &index); - if (tok.id == .StrLit or tok.id == .CharLit) - try tl.push(try zigifyEscapeSequences(ctx, loc, name, tl.allocator, tok)) - else - try tl.push(tok); - if (tok.id == .Eof) - return; - if (first) { - // distinguish NAME (EXPR) from NAME(ARGS) - first = false; - if (chars[index] == '(') { - try tl.push(.{ - .id = .Fn, - .bytes = "", - }); - } - } - } -} - -fn zigifyEscapeSequences(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, allocator: *std.mem.Allocator, tok: CToken) !CToken { - for (tok.bytes) |c| { - if (c == '\\') { - break; - } - } else return tok; - var bytes = try allocator.alloc(u8, tok.bytes.len * 2); - var state: enum { - Start, - Escape, - Hex, - Octal, - } = .Start; - var i: usize = 0; - var count: u8 = 0; - var num: u8 = 0; - for (tok.bytes) |c| { - switch (state) { - .Escape => { - switch (c) { - 'n', 'r', 't', '\\', '\'', '\"' => { - bytes[i] = c; - }, - '0'...'7' => { - count += 1; - num += c - '0'; - state = .Octal; - bytes[i] = 'x'; - }, - 'x' => { - state = .Hex; - bytes[i] = 'x'; - }, - 'a' => { - bytes[i] = 'x'; - i += 1; - bytes[i] = '0'; - i += 1; - bytes[i] = '7'; - }, - 'b' => { - bytes[i] = 'x'; - i += 1; - bytes[i] = '0'; - i += 1; - bytes[i] = '8'; - }, - 'f' => { - bytes[i] = 'x'; - i += 1; - bytes[i] = '0'; - i += 1; - bytes[i] = 'C'; - }, - 'v' => { - bytes[i] = 'x'; - i += 1; - bytes[i] = '0'; - i += 1; - bytes[i] = 'B'; - }, - '?' => { - i -= 1; - bytes[i] = '?'; - }, - 'u', 'U' => { - try failDecl(ctx, loc, name, "macro tokenizing failed: TODO unicode escape sequences", .{}); - return error.TokenizingFailed; - }, - else => { - try failDecl(ctx, loc, name, "macro tokenizing failed: unknown escape sequence", .{}); - return error.TokenizingFailed; - }, - } - i += 1; - if (state == .Escape) - state = .Start; - }, - .Start => { - if (c == '\\') { - state = .Escape; - } - bytes[i] = c; - i += 1; - }, - .Hex => { - switch (c) { - '0'...'9' => { - num = std.math.mul(u8, num, 16) catch { - try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{}); - return error.TokenizingFailed; - }; - num += c - '0'; - }, - 'a'...'f' => { - num = std.math.mul(u8, num, 16) catch { - try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{}); - return error.TokenizingFailed; - }; - num += c - 'a' + 10; - }, - 'A'...'F' => { - num = std.math.mul(u8, num, 16) catch { - try failDecl(ctx, loc, name, "macro tokenizing failed: hex literal overflowed", .{}); - return error.TokenizingFailed; - }; - num += c - 'A' + 10; - }, - else => { - i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); - num = 0; - if (c == '\\') - state = .Escape - else - state = .Start; - bytes[i] = c; - i += 1; - }, - } - }, - .Octal => { - const accept_digit = switch (c) { - // The maximum length of a octal literal is 3 digits - '0'...'7' => count < 3, - else => false, - }; - - if (accept_digit) { - count += 1; - num = std.math.mul(u8, num, 8) catch { - try failDecl(ctx, loc, name, "macro tokenizing failed: octal literal overflowed", .{}); - return error.TokenizingFailed; - }; - num += c - '0'; - } else { - i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); - num = 0; - count = 0; - if (c == '\\') - state = .Escape - else - state = .Start; - bytes[i] = c; - i += 1; - } - }, - } - } - if (state == .Hex or state == .Octal) - i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); - return CToken{ - .id = tok.id, - .bytes = bytes[0..i], - }; -} - -fn next(ctx: *Context, loc: ZigClangSourceLocation, name: []const u8, chars: [*:0]const u8, i: *usize) !CToken { - var state: enum { - Start, - SawLt, - SawGt, - SawPlus, - SawMinus, - SawAmpersand, - SawPipe, - SawBang, - SawEq, - CharLit, - OpenComment, - Comment, - CommentStar, - Backslash, - String, - Identifier, - Decimal, - Octal, - SawZero, - Hex, - Bin, - Float, - ExpSign, - FloatExp, - FloatExpFirst, - NumLitIntSuffixU, - NumLitIntSuffixL, - NumLitIntSuffixLL, - NumLitIntSuffixUL, - Done, - } = .Start; - - var result = CToken{ - .bytes = "", - .id = .Eof, - }; - var begin_index: usize = 0; - var digits: u8 = 0; - var pre_escape = state; - - while (true) { - const c = chars[i.*]; - if (c == 0) { - switch (state) { - .Identifier, - .Decimal, - .Hex, - .Bin, - .Octal, - .SawZero, - .Float, - .FloatExp, - => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - .Start, - .SawMinus, - .Done, - .NumLitIntSuffixU, - .NumLitIntSuffixL, - .NumLitIntSuffixUL, - .NumLitIntSuffixLL, - .SawLt, - .SawGt, - .SawPlus, - .SawAmpersand, - .SawPipe, - .SawBang, - .SawEq, - => { - return result; - }, - .CharLit, - .OpenComment, - .Comment, - .CommentStar, - .Backslash, - .String, - .ExpSign, - .FloatExpFirst, - => { - try failDecl(ctx, loc, name, "macro tokenizing failed: unexpected EOF", .{}); - return error.TokenizingFailed; - }, - } - } - switch (state) { - .Start => { - switch (c) { - ' ', '\t', '\x0B', '\x0C' => {}, - '\'' => { - state = .CharLit; - result.id = .CharLit; - begin_index = i.*; - }, - '\"' => { - state = .String; - result.id = .StrLit; - begin_index = i.*; - }, - '/' => { - state = .OpenComment; - }, - '\\' => { - state = .Backslash; - }, - '\n', '\r' => { - return result; - }, - 'a'...'z', 'A'...'Z', '_' => { - state = .Identifier; - result.id = .Identifier; - begin_index = i.*; - }, - '1'...'9' => { - state = .Decimal; - result.id = .NumLitInt; - begin_index = i.*; - }, - '0' => { - state = .SawZero; - result.id = .NumLitInt; - begin_index = i.*; - }, - '.' => { - result.id = .Dot; - state = .Done; - }, - '<' => { - result.id = .Lt; - state = .SawLt; - }, - '>' => { - result.id = .Gt; - state = .SawGt; - }, - '(' => { - result.id = .LParen; - state = .Done; - }, - ')' => { - result.id = .RParen; - state = .Done; - }, - '*' => { - result.id = .Asterisk; - state = .Done; - }, - '+' => { - result.id = .Plus; - state = .SawPlus; - }, - '-' => { - result.id = .Minus; - state = .SawMinus; - }, - '!' => { - result.id = .Bang; - state = .SawBang; - }, - '~' => { - result.id = .Tilde; - state = .Done; - }, - '=' => { - result.id = .Assign; - state = .SawEq; - }, - ',' => { - result.id = .Comma; - state = .Done; - }, - '[' => { - result.id = .LBrace; - state = .Done; - }, - ']' => { - result.id = .RBrace; - state = .Done; - }, - '|' => { - result.id = .Pipe; - state = .SawPipe; - }, - '&' => { - result.id = .Ampersand; - state = .SawAmpersand; - }, - '?' => { - result.id = .QuestionMark; - state = .Done; - }, - ':' => { - result.id = .Colon; - state = .Done; - }, - else => { - try failDecl(ctx, loc, name, "macro tokenizing failed: unexpected character '{c}'", .{c}); - return error.TokenizingFailed; - }, - } - }, - .Done => return result, - .SawMinus => { - switch (c) { - '>' => { - result.id = .Arrow; - state = .Done; - }, - '-' => { - result.id = .Decrement; - state = .Done; - }, - else => return result, - } - }, - .SawPlus => { - switch (c) { - '+' => { - result.id = .Increment; - state = .Done; - }, - else => return result, - } - }, - .SawLt => { - switch (c) { - '<' => { - result.id = .Shl; - state = .Done; - }, - '=' => { - result.id = .Lte; - state = .Done; - }, - else => return result, - } - }, - .SawGt => { - switch (c) { - '>' => { - result.id = .Shr; - state = .Done; - }, - '=' => { - result.id = .Gte; - state = .Done; - }, - else => return result, - } - }, - .SawPipe => { - switch (c) { - '|' => { - result.id = .Or; - state = .Done; - }, - else => return result, - } - }, - .SawAmpersand => { - switch (c) { - '&' => { - result.id = .And; - state = .Done; - }, - else => return result, - } - }, - .SawBang => { - switch (c) { - '=' => { - result.id = .Ne; - state = .Done; - }, - else => return result, - } - }, - .SawEq => { - switch (c) { - '=' => { - result.id = .Eq; - state = .Done; - }, - else => return result, - } - }, - .Float => { - switch (c) { - '.', '0'...'9' => {}, - 'e', 'E' => { - state = .ExpSign; - }, - 'f', - 'F', - => { - result.num_lit_suffix = .F; - result.bytes = chars[begin_index..i.*]; - state = .Done; - }, - 'l', 'L' => { - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - state = .Done; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .ExpSign => { - switch (c) { - '+', '-' => { - state = .FloatExpFirst; - }, - '0'...'9' => { - state = .FloatExp; - }, - else => { - try failDecl(ctx, loc, name, "macro tokenizing failed: expected a digit or '+' or '-'", .{}); - return error.TokenizingFailed; - }, - } - }, - .FloatExpFirst => { - switch (c) { - '0'...'9' => { - state = .FloatExp; - }, - else => { - try failDecl(ctx, loc, name, "macro tokenizing failed: expected a digit", .{}); - return error.TokenizingFailed; - }, - } - }, - .FloatExp => { - switch (c) { - '0'...'9' => {}, - 'f', 'F' => { - result.num_lit_suffix = .F; - result.bytes = chars[begin_index..i.*]; - state = .Done; - }, - 'l', 'L' => { - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - state = .Done; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .Decimal => { - switch (c) { - '0'...'9' => {}, - '\'' => {}, - 'u', 'U' => { - state = .NumLitIntSuffixU; - result.num_lit_suffix = .U; - result.bytes = chars[begin_index..i.*]; - }, - 'l', 'L' => { - state = .NumLitIntSuffixL; - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - }, - '.' => { - result.id = .NumLitFloat; - state = .Float; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .SawZero => { - switch (c) { - 'x', 'X' => { - state = .Hex; - }, - 'b', 'B' => { - state = .Bin; - }, - '.' => { - state = .Float; - result.id = .NumLitFloat; - }, - 'u', 'U' => { - state = .NumLitIntSuffixU; - result.num_lit_suffix = .U; - result.bytes = chars[begin_index..i.*]; - }, - 'l', 'L' => { - state = .NumLitIntSuffixL; - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - }, - else => { - i.* -= 1; - state = .Octal; - }, - } - }, - .Octal => { - switch (c) { - '0'...'7' => {}, - '8', '9' => { - try failDecl(ctx, loc, name, "macro tokenizing failed: invalid digit '{c}' in octal number", .{c}); - return error.TokenizingFailed; - }, - 'u', 'U' => { - state = .NumLitIntSuffixU; - result.num_lit_suffix = .U; - result.bytes = chars[begin_index..i.*]; - }, - 'l', 'L' => { - state = .NumLitIntSuffixL; - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .Hex => { - switch (c) { - '0'...'9', 'a'...'f', 'A'...'F' => {}, - 'u', 'U' => { - // marks the number literal as unsigned - state = .NumLitIntSuffixU; - result.num_lit_suffix = .U; - result.bytes = chars[begin_index..i.*]; - }, - 'l', 'L' => { - // marks the number literal as long - state = .NumLitIntSuffixL; - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .Bin => { - switch (c) { - '0'...'1' => {}, - '2'...'9' => { - try failDecl(ctx, loc, name, "macro tokenizing failed: invalid digit '{c}' in binary number", .{c}); - return error.TokenizingFailed; - }, - 'u', 'U' => { - // marks the number literal as unsigned - state = .NumLitIntSuffixU; - result.num_lit_suffix = .U; - result.bytes = chars[begin_index..i.*]; - }, - 'l', 'L' => { - // marks the number literal as long - state = .NumLitIntSuffixL; - result.num_lit_suffix = .L; - result.bytes = chars[begin_index..i.*]; - }, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .NumLitIntSuffixU => { - switch (c) { - 'l', 'L' => { - result.num_lit_suffix = .LU; - state = .NumLitIntSuffixUL; - }, - else => { - return result; - }, - } - }, - .NumLitIntSuffixL => { - switch (c) { - 'l', 'L' => { - result.num_lit_suffix = .LL; - state = .NumLitIntSuffixLL; - }, - 'u', 'U' => { - result.num_lit_suffix = .LU; - state = .Done; - }, - else => { - return result; - }, - } - }, - .NumLitIntSuffixLL => { - switch (c) { - 'u', 'U' => { - result.num_lit_suffix = .LLU; - state = .Done; - }, - else => { - return result; - }, - } - }, - .NumLitIntSuffixUL => { - switch (c) { - 'l', 'L' => { - result.num_lit_suffix = .LLU; - state = .Done; - }, - else => { - return result; - }, - } - }, - .Identifier => { - switch (c) { - '_', 'a'...'z', 'A'...'Z', '0'...'9' => {}, - else => { - result.bytes = chars[begin_index..i.*]; - return result; - }, - } - }, - .String => { - switch (c) { - '\"' => { - result.bytes = chars[begin_index .. i.* + 1]; - state = .Done; - }, - else => {}, - } - }, - .CharLit => { - switch (c) { - '\'' => { - result.bytes = chars[begin_index .. i.* + 1]; - state = .Done; - }, - else => {}, - } - }, - .OpenComment => { - switch (c) { - '/' => { - return result; - }, - '*' => { - state = .Comment; - }, - else => { - result.id = .Slash; - state = .Done; - }, - } - }, - .Comment => { - switch (c) { - '*' => { - state = .CommentStar; - }, - else => {}, - } - }, - .CommentStar => { - switch (c) { - '/' => { - state = .Start; - }, - else => { - state = .Comment; - }, - } - }, - .Backslash => { - switch (c) { - ' ', '\t', '\x0B', '\x0C' => {}, - '\n', '\r' => { - state = .Start; - }, - else => { - try failDecl(ctx, loc, name, "macro tokenizing failed: expected whitespace", .{}); - return error.TokenizingFailed; - }, - } - }, - } - i.* += 1; - } - unreachable; -} - -fn expectTokens(tl: *TokenList, src: [*:0]const u8, expected: []CToken) void { - // these can be undefined since they are only used for error reporting - tokenizeCMacro(undefined, undefined, undefined, tl, src) catch unreachable; - var it = tl.iterator(0); - for (expected) |t| { - var tok = it.next().?; - std.testing.expectEqual(t.id, tok.id); - if (t.bytes.len > 0) { - //std.debug.warn(" {} = {}\n", .{tok.bytes, t.bytes}); - std.testing.expectEqualSlices(u8, tok.bytes, t.bytes); - } - if (t.num_lit_suffix != .None) { - std.testing.expectEqual(t.num_lit_suffix, tok.num_lit_suffix); - } - } - std.testing.expect(it.next() == null); - tl.shrink(0); -} - -test "tokenize macro" { - var tl = TokenList.init(std.testing.allocator); - defer tl.deinit(); - - expectTokens(&tl, "TEST(0\n", &[_]CToken{ - .{ .id = .Identifier, .bytes = "TEST" }, - .{ .id = .Fn }, - .{ .id = .LParen }, - .{ .id = .NumLitInt, .bytes = "0" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "__FLT_MIN_10_EXP__ -37\n", &[_]CToken{ - .{ .id = .Identifier, .bytes = "__FLT_MIN_10_EXP__" }, - .{ .id = .Minus }, - .{ .id = .NumLitInt, .bytes = "37" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "__llvm__ 1\n#define", &[_]CToken{ - .{ .id = .Identifier, .bytes = "__llvm__" }, - .{ .id = .NumLitInt, .bytes = "1" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "TEST 2", &[_]CToken{ - .{ .id = .Identifier, .bytes = "TEST" }, - .{ .id = .NumLitInt, .bytes = "2" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "FOO 0ull", &[_]CToken{ - .{ .id = .Identifier, .bytes = "FOO" }, - .{ .id = .NumLitInt, .bytes = "0", .num_lit_suffix = .LLU }, - .{ .id = .Eof }, - }); -} - -test "tokenize macro ops" { - var tl = TokenList.init(std.testing.allocator); - defer tl.deinit(); - - expectTokens(&tl, "ADD A + B", &[_]CToken{ - .{ .id = .Identifier, .bytes = "ADD" }, - .{ .id = .Identifier, .bytes = "A" }, - .{ .id = .Plus }, - .{ .id = .Identifier, .bytes = "B" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "ADD (A) + B", &[_]CToken{ - .{ .id = .Identifier, .bytes = "ADD" }, - .{ .id = .LParen }, - .{ .id = .Identifier, .bytes = "A" }, - .{ .id = .RParen }, - .{ .id = .Plus }, - .{ .id = .Identifier, .bytes = "B" }, - .{ .id = .Eof }, - }); - - expectTokens(&tl, "ADD (A) + B", &[_]CToken{ - .{ .id = .Identifier, .bytes = "ADD" }, - .{ .id = .LParen }, - .{ .id = .Identifier, .bytes = "A" }, - .{ .id = .RParen }, - .{ .id = .Plus }, - .{ .id = .Identifier, .bytes = "B" }, - .{ .id = .Eof }, - }); -} - -test "escape sequences" { - var buf: [1024]u8 = undefined; - var alloc = std.heap.FixedBufferAllocator.init(buf[0..]); - const a = &alloc.allocator; - // these can be undefined since they are only used for error reporting - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .StrLit, - .bytes = "\\x0077", - })).bytes, "\\x77")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .StrLit, - .bytes = "\\24500", - })).bytes, "\\xa500")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .StrLit, - .bytes = "\\x0077 abc", - })).bytes, "\\x77 abc")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .StrLit, - .bytes = "\\045abc", - })).bytes, "\\x25abc")); - - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .CharLit, - .bytes = "\\0", - })).bytes, "\\x00")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .CharLit, - .bytes = "\\00", - })).bytes, "\\x00")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .CharLit, - .bytes = "\\000\\001", - })).bytes, "\\x00\\x01")); - expect(std.mem.eql(u8, (try zigifyEscapeSequences(undefined, undefined, undefined, a, .{ - .id = .CharLit, - .bytes = "\\000abc", - })).bytes, "\\x00abc")); -} diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index ec89072ca4..df83b697a7 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -6,8 +6,9 @@ const assert = std.debug.assert; const ast = std.zig.ast; const Token = std.zig.Token; usingnamespace @import("clang.zig"); -const ctok = @import("c_tokenizer.zig"); -const CToken = ctok.CToken; +const ctok = std.c.tokenizer; +const CToken = std.c.Token; +const CTokenList = std.c.tokenizer.Source.TokenList; const mem = std.mem; const math = std.math; @@ -4818,7 +4819,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { // TODO if we see #undef, delete it from the table var it = ZigClangASTUnit_getLocalPreprocessingEntities_begin(unit); const it_end = ZigClangASTUnit_getLocalPreprocessingEntities_end(unit); - var tok_list = ctok.TokenList.init(c.a()); + var tok_list = CTokenList.init(c.a()); const scope = c.global_scope; while (it.I != it_end.I) : (it.I += 1) { @@ -4829,6 +4830,7 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { const macro = @ptrCast(*ZigClangMacroDefinitionRecord, entity); const raw_name = ZigClangMacroDefinitionRecord_getName_getNameStart(macro); const begin_loc = ZigClangMacroDefinitionRecord_getSourceRange_getBegin(macro); + // const end_loc = ZigClangMacroDefinitionRecord_getSourceRange_getEnd(macro); const name = try c.str(raw_name); // TODO https://github.com/ziglang/zig/issues/3756 @@ -4839,42 +4841,61 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { } const begin_c = ZigClangSourceManager_getCharacterData(c.source_manager, begin_loc); - ctok.tokenizeCMacro(c, begin_loc, mangled_name, &tok_list, begin_c) catch |err| switch (err) { - error.OutOfMemory => |e| return e, - else => { - continue; + // const end_c = ZigClangSourceManager_getCharacterData(c.source_manager, end_loc); + // const slice = begin_c[0 .. @ptrToInt(end_c) - @ptrToInt(begin_c)]; + const slice = begin_c[0..mem.len(u8, begin_c)]; + + tok_list.shrink(0); + var tokenizer = std.c.Tokenizer{ + .source = &std.c.tokenizer.Source{ + .buffer = slice, + .file_name = undefined, + .tokens = undefined, }, }; + while (true) { + const tok = tokenizer.next(); + switch (tok.id) { + .Nl, .Eof => { + try tok_list.push(tok); + break; + }, + .LineComment, .MultiLineComment => continue, + else => {}, + } + try tok_list.push(tok); + } var tok_it = tok_list.iterator(0); const first_tok = tok_it.next().?; - assert(first_tok.id == .Identifier and mem.eql(u8, first_tok.bytes, name)); + assert(first_tok.id == .Identifier and mem.eql(u8, slice[first_tok.start..first_tok.end], name)); + + var macro_fn = false; const next = tok_it.peek().?; switch (next.id) { .Identifier => { // if it equals itself, ignore. for example, from stdio.h: // #define stdin stdin - if (mem.eql(u8, name, next.bytes)) { + if (mem.eql(u8, name, slice[next.start..next.end])) { continue; } }, - .Eof => { + .Nl, .Eof => { // this means it is a macro without a value // we don't care about such things continue; }, + .LParen => { + // if the name is immediately followed by a '(' then it is a function + macro_fn = first_tok.end == next.start; + }, else => {}, } - const macro_fn = if (tok_it.peek().?.id == .Fn) blk: { - _ = tok_it.next(); - break :blk true; - } else false; - (if (macro_fn) - transMacroFnDefine(c, &tok_it, mangled_name, begin_loc) + transMacroFnDefine(c, &tok_it, slice, mangled_name, begin_loc) else - transMacroDefine(c, &tok_it, mangled_name, begin_loc)) catch |err| switch (err) { + transMacroDefine(c, &tok_it, slice, mangled_name, begin_loc)) catch |err| switch (err) { error.ParseError => continue, error.OutOfMemory => |e| return e, }; @@ -4884,15 +4905,15 @@ fn transPreprocessorEntities(c: *Context, unit: *ZigClangASTUnit) Error!void { } } -fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { +fn transMacroDefine(c: *Context, it: *CTokenList.Iterator, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { const scope = &c.global_scope.base; const node = try transCreateNodeVarDecl(c, true, true, name); node.eq_token = try appendToken(c, .Equal, "="); - node.init_node = try parseCExpr(c, it, source_loc, scope); + node.init_node = try parseCExpr(c, it, source, source_loc, scope); const last = it.next().?; - if (last.id != .Eof) + if (last.id != .Eof and last.id != .Nl) return failDecl( c, source_loc, @@ -4905,7 +4926,7 @@ fn transMacroDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, _ = try c.global_scope.macro_table.put(name, &node.base); } -fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { +fn transMacroFnDefine(c: *Context, it: *CTokenList.Iterator, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ParseError!void { const block_scope = try Scope.Block.init(c, &c.global_scope.base, null); const scope = &block_scope.base; @@ -4937,7 +4958,7 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u ); } - const mangled_name = try block_scope.makeMangledName(c, param_tok.bytes); + const mangled_name = try block_scope.makeMangledName(c, source[param_tok.start..param_tok.end]); const param_name_tok = try appendIdentifier(c, mangled_name); _ = try appendToken(c, .Colon, ":"); @@ -5000,7 +5021,7 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u const block = try transCreateNodeBlock(c, null); const return_expr = try transCreateNodeReturnExpr(c); - const expr = try parseCExpr(c, it, source_loc, scope); + const expr = try parseCExpr(c, it, source, source_loc, scope); const last = it.next().?; if (last.id != .Eof) return failDecl( @@ -5022,27 +5043,28 @@ fn transMacroFnDefine(c: *Context, it: *ctok.TokenList.Iterator, name: []const u const ParseError = Error || error{ParseError}; -fn parseCExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { - const node = try parseCPrefixOpExpr(c, it, source_loc, scope); +fn parseCExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { + const node = try parseCPrefixOpExpr(c, it, source, source_loc, scope); switch (it.next().?.id) { .QuestionMark => { // must come immediately after expr _ = try appendToken(c, .RParen, ")"); const if_node = try transCreateNodeIf(c); if_node.condition = node; - if_node.body = try parseCPrimaryExpr(c, it, source_loc, scope); + if_node.body = try parseCPrimaryExpr(c, it, source, source_loc, scope); if (it.next().?.id != .Colon) { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected ':'", .{}, ); return error.ParseError; } if_node.@"else" = try transCreateNodeElse(c); - if_node.@"else".?.body = try parseCPrimaryExpr(c, it, source_loc, scope); + if_node.@"else".?.body = try parseCPrimaryExpr(c, it, source, source_loc, scope); return &if_node.base; }, else => { @@ -5052,30 +5074,30 @@ fn parseCExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSou } } -fn parseCNumLit(c: *Context, tok: *CToken, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { - if (tok.id == .NumLitInt) { - var lit_bytes = tok.bytes; +fn parseCNumLit(c: *Context, tok: *CToken, source: []const u8, source_loc: ZigClangSourceLocation) ParseError!*ast.Node { + var lit_bytes = source[tok.start..tok.end]; - if (tok.bytes.len > 2 and tok.bytes[0] == '0') { - switch (tok.bytes[1]) { + if (tok.id == .IntegerLiteral) { + if (lit_bytes.len > 2 and lit_bytes[0] == '0') { + switch (lit_bytes[1]) { '0'...'7' => { // Octal - lit_bytes = try std.fmt.allocPrint(c.a(), "0o{}", .{tok.bytes}); + lit_bytes = try std.fmt.allocPrint(c.a(), "0o{}", .{lit_bytes}); }, 'X' => { // Hexadecimal with capital X, valid in C but not in Zig - lit_bytes = try std.fmt.allocPrint(c.a(), "0x{}", .{tok.bytes[2..]}); + lit_bytes = try std.fmt.allocPrint(c.a(), "0x{}", .{lit_bytes[2..]}); }, else => {}, } } - if (tok.num_lit_suffix == .None) { + if (tok.id.IntegerLiteral == .None) { return transCreateNodeInt(c, lit_bytes); } const cast_node = try transCreateNodeBuiltinFnCall(c, "@as"); - try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.num_lit_suffix) { + try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.id.IntegerLiteral) { .U => "c_uint", .L => "c_long", .LU => "c_ulong", @@ -5083,55 +5105,216 @@ fn parseCNumLit(c: *Context, tok: *CToken, source_loc: ZigClangSourceLocation) P .LLU => "c_ulonglong", else => unreachable, })); + lit_bytes = lit_bytes[0 .. lit_bytes.len - switch (tok.id.IntegerLiteral) { + .U, .L => @as(u8, 1), + .LU, .LL => 2, + .LLU => 3, + else => unreachable, + }]; _ = try appendToken(c, .Comma, ","); try cast_node.params.push(try transCreateNodeInt(c, lit_bytes)); cast_node.rparen_token = try appendToken(c, .RParen, ")"); return &cast_node.base; - } else if (tok.id == .NumLitFloat) { - if (tok.num_lit_suffix == .None) { - return transCreateNodeFloat(c, tok.bytes); + } else if (tok.id == .FloatLiteral) { + if (tok.id.FloatLiteral == .None) { + return transCreateNodeFloat(c, lit_bytes); } const cast_node = try transCreateNodeBuiltinFnCall(c, "@as"); - try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.num_lit_suffix) { + try cast_node.params.push(try transCreateNodeIdentifier(c, switch (tok.id.FloatLiteral) { .F => "f32", .L => "f64", else => unreachable, })); _ = try appendToken(c, .Comma, ","); - try cast_node.params.push(try transCreateNodeFloat(c, tok.bytes)); + try cast_node.params.push(try transCreateNodeFloat(c, lit_bytes[0 .. lit_bytes.len - 1])); cast_node.rparen_token = try appendToken(c, .RParen, ")"); return &cast_node.base; } else unreachable; } -fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { +fn zigifyEscapeSequences(ctx: *Context, source: []const u8, name: []const u8, source_loc: ZigClangSourceLocation) ![]const u8 { + for (source) |c| { + if (c == '\\') { + break; + } + } else return source; + var bytes = try ctx.a().alloc(u8, source.len * 2); + var state: enum { + Start, + Escape, + Hex, + Octal, + } = .Start; + var i: usize = 0; + var count: u8 = 0; + var num: u8 = 0; + for (source) |c| { + switch (state) { + .Escape => { + switch (c) { + 'n', 'r', 't', '\\', '\'', '\"' => { + bytes[i] = c; + }, + '0'...'7' => { + count += 1; + num += c - '0'; + state = .Octal; + bytes[i] = 'x'; + }, + 'x' => { + state = .Hex; + bytes[i] = 'x'; + }, + 'a' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = '7'; + }, + 'b' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = '8'; + }, + 'f' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = 'C'; + }, + 'v' => { + bytes[i] = 'x'; + i += 1; + bytes[i] = '0'; + i += 1; + bytes[i] = 'B'; + }, + '?' => { + i -= 1; + bytes[i] = '?'; + }, + 'u', 'U' => { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: TODO unicode escape sequences", .{}); + return error.ParseError; + }, + else => { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: unknown escape sequence", .{}); + return error.ParseError; + }, + } + i += 1; + if (state == .Escape) + state = .Start; + }, + .Start => { + if (c == '\\') { + state = .Escape; + } + bytes[i] = c; + i += 1; + }, + .Hex => { + switch (c) { + '0'...'9' => { + num = std.math.mul(u8, num, 16) catch { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{}); + return error.ParseError; + }; + num += c - '0'; + }, + 'a'...'f' => { + num = std.math.mul(u8, num, 16) catch { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{}); + return error.ParseError; + }; + num += c - 'a' + 10; + }, + 'A'...'F' => { + num = std.math.mul(u8, num, 16) catch { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: hex literal overflowed", .{}); + return error.ParseError; + }; + num += c - 'A' + 10; + }, + else => { + i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); + num = 0; + if (c == '\\') + state = .Escape + else + state = .Start; + bytes[i] = c; + i += 1; + }, + } + }, + .Octal => { + const accept_digit = switch (c) { + // The maximum length of a octal literal is 3 digits + '0'...'7' => count < 3, + else => false, + }; + + if (accept_digit) { + count += 1; + num = std.math.mul(u8, num, 8) catch { + try failDecl(ctx, source_loc, name, "macro tokenizing failed: octal literal overflowed", .{}); + return error.ParseError; + }; + num += c - '0'; + } else { + i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); + num = 0; + count = 0; + if (c == '\\') + state = .Escape + else + state = .Start; + bytes[i] = c; + i += 1; + } + }, + } + } + if (state == .Hex or state == .Octal) + i += std.fmt.formatIntBuf(bytes[i..], num, 16, false, std.fmt.FormatOptions{ .fill = '0', .width = 2 }); + return bytes[0..i]; +} + +fn parseCPrimaryExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { const tok = it.next().?; switch (tok.id) { - .CharLit => { - const token = try appendToken(c, .CharLiteral, tok.bytes); + .CharLiteral => { + const first_tok = it.list.at(0); + const token = try appendToken(c, .CharLiteral, try zigifyEscapeSequences(c, source[tok.start..tok.end], source[first_tok.start..first_tok.end], source_loc)); const node = try c.a().create(ast.Node.CharLiteral); node.* = ast.Node.CharLiteral{ .token = token, }; return &node.base; }, - .StrLit => { - const token = try appendToken(c, .StringLiteral, tok.bytes); + .StringLiteral => { + const first_tok = it.list.at(0); + const token = try appendToken(c, .StringLiteral, try zigifyEscapeSequences(c, source[tok.start..tok.end], source[first_tok.start..first_tok.end], source_loc)); const node = try c.a().create(ast.Node.StringLiteral); node.* = ast.Node.StringLiteral{ .token = token, }; return &node.base; }, - .NumLitInt, .NumLitFloat => { - return parseCNumLit(c, tok, source_loc); + .IntegerLiteral, .FloatLiteral => { + return parseCNumLit(c, tok, source, source_loc); }, .Identifier => { - const mangled_name = scope.getAlias(tok.bytes); + const mangled_name = scope.getAlias(source[tok.start..tok.end]); return transCreateNodeIdentifier(c, mangled_name); }, .LParen => { - const inner_node = try parseCExpr(c, it, source_loc, scope); + const inner_node = try parseCExpr(c, it, source, source_loc, scope); if (it.peek().?.id == .RParen) { _ = it.next(); @@ -5144,13 +5327,14 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC // hack to get zig fmt to render a comma in builtin calls _ = try appendToken(c, .Comma, ","); - const node_to_cast = try parseCExpr(c, it, source_loc, scope); + const node_to_cast = try parseCExpr(c, it, source, source_loc, scope); if (it.next().?.id != .RParen) { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected ')''", .{}, ); @@ -5228,10 +5412,11 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC return &if_1.base; }, else => { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: unexpected token {}", .{tok.id}, ); @@ -5240,33 +5425,35 @@ fn parseCPrimaryExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigC } } -fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { - var node = try parseCPrimaryExpr(c, it, source_loc, scope); +fn parseCSuffixOpExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { + var node = try parseCPrimaryExpr(c, it, source, source_loc, scope); while (true) { const tok = it.next().?; switch (tok.id) { - .Dot => { + .Period => { const name_tok = it.next().?; if (name_tok.id != .Identifier) { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected identifier", .{}, ); return error.ParseError; } - node = try transCreateNodeFieldAccess(c, node, name_tok.bytes); + node = try transCreateNodeFieldAccess(c, node, source[name_tok.start..name_tok.end]); }, .Arrow => { const name_tok = it.next().?; if (name_tok.id != .Identifier) { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected identifier", .{}, ); @@ -5274,7 +5461,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig } const deref = try transCreateNodePtrDeref(c, node); - node = try transCreateNodeFieldAccess(c, deref, name_tok.bytes); + node = try transCreateNodeFieldAccess(c, deref, source[name_tok.start..name_tok.end]); }, .Asterisk => { if (it.peek().?.id == .RParen) { @@ -5289,7 +5476,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig } else { // expr * expr const op_token = try appendToken(c, .Asterisk, "*"); - const rhs = try parseCPrimaryExpr(c, it, source_loc, scope); + const rhs = try parseCPrimaryExpr(c, it, source, source_loc, scope); const mul_node = try c.a().create(ast.Node.InfixOp); mul_node.* = .{ .op_token = op_token, @@ -5300,9 +5487,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig node = &mul_node.base; } }, - .Shl => { + .AngleBracketAngleBracketLeft => { const op_token = try appendToken(c, .AngleBracketAngleBracketLeft, "<<"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const bitshift_node = try c.a().create(ast.Node.InfixOp); bitshift_node.* = .{ .op_token = op_token, @@ -5312,9 +5499,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &bitshift_node.base; }, - .Shr => { + .AngleBracketAngleBracketRight => { const op_token = try appendToken(c, .AngleBracketAngleBracketRight, ">>"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const bitshift_node = try c.a().create(ast.Node.InfixOp); bitshift_node.* = .{ .op_token = op_token, @@ -5326,7 +5513,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }, .Pipe => { const op_token = try appendToken(c, .Pipe, "|"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const or_node = try c.a().create(ast.Node.InfixOp); or_node.* = .{ .op_token = op_token, @@ -5338,7 +5525,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }, .Ampersand => { const op_token = try appendToken(c, .Ampersand, "&"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const bitand_node = try c.a().create(ast.Node.InfixOp); bitand_node.* = .{ .op_token = op_token, @@ -5350,7 +5537,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }, .Plus => { const op_token = try appendToken(c, .Plus, "+"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const add_node = try c.a().create(ast.Node.InfixOp); add_node.* = .{ .op_token = op_token, @@ -5362,7 +5549,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }, .Minus => { const op_token = try appendToken(c, .Minus, "-"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const sub_node = try c.a().create(ast.Node.InfixOp); sub_node.* = .{ .op_token = op_token, @@ -5372,9 +5559,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &sub_node.base; }, - .And => { + .AmpersandAmpersand => { const op_token = try appendToken(c, .Keyword_and, "and"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const and_node = try c.a().create(ast.Node.InfixOp); and_node.* = .{ .op_token = op_token, @@ -5384,9 +5571,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &and_node.base; }, - .Or => { + .PipePipe => { const op_token = try appendToken(c, .Keyword_or, "or"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const or_node = try c.a().create(ast.Node.InfixOp); or_node.* = .{ .op_token = op_token, @@ -5396,9 +5583,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &or_node.base; }, - .Gt => { + .AngleBracketRight => { const op_token = try appendToken(c, .AngleBracketRight, ">"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const and_node = try c.a().create(ast.Node.InfixOp); and_node.* = .{ .op_token = op_token, @@ -5408,9 +5595,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &and_node.base; }, - .Gte => { + .AngleBracketRightEqual => { const op_token = try appendToken(c, .AngleBracketRightEqual, ">="); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const and_node = try c.a().create(ast.Node.InfixOp); and_node.* = .{ .op_token = op_token, @@ -5420,9 +5607,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &and_node.base; }, - .Lt => { + .AngleBracketLeft => { const op_token = try appendToken(c, .AngleBracketLeft, "<"); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const and_node = try c.a().create(ast.Node.InfixOp); and_node.* = .{ .op_token = op_token, @@ -5432,9 +5619,9 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }; node = &and_node.base; }, - .Lte => { + .AngleBracketLeftEqual => { const op_token = try appendToken(c, .AngleBracketLeftEqual, "<="); - const rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + const rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); const and_node = try c.a().create(ast.Node.InfixOp); and_node.* = .{ .op_token = op_token, @@ -5446,14 +5633,15 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig }, .LBrace => { const arr_node = try transCreateNodeArrayAccess(c, node); - arr_node.op.ArrayAccess = try parseCPrefixOpExpr(c, it, source_loc, scope); + arr_node.op.ArrayAccess = try parseCPrefixOpExpr(c, it, source, source_loc, scope); arr_node.rtoken = try appendToken(c, .RBrace, "]"); node = &arr_node.base; if (it.next().?.id != .RBrace) { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected ']'", .{}, ); @@ -5463,7 +5651,7 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig .LParen => { const call_node = try transCreateNodeFnCall(c, node); while (true) { - const arg = try parseCPrefixOpExpr(c, it, source_loc, scope); + const arg = try parseCPrefixOpExpr(c, it, source, source_loc, scope); try call_node.op.Call.params.push(arg); const next = it.next().?; if (next.id == .Comma) @@ -5471,10 +5659,11 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig else if (next.id == .RParen) break else { + const first_tok = it.list.at(0); try failDecl( c, source_loc, - it.list.at(0).*.bytes, + source[first_tok.start..first_tok.end], "unable to translate C expr: expected ',' or ')'", .{}, ); @@ -5492,32 +5681,32 @@ fn parseCSuffixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: Zig } } -fn parseCPrefixOpExpr(c: *Context, it: *ctok.TokenList.Iterator, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { +fn parseCPrefixOpExpr(c: *Context, it: *CTokenList.Iterator, source: []const u8, source_loc: ZigClangSourceLocation, scope: *Scope) ParseError!*ast.Node { const op_tok = it.next().?; switch (op_tok.id) { .Bang => { const node = try transCreateNodePrefixOp(c, .BoolNot, .Bang, "!"); - node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); return &node.base; }, .Minus => { const node = try transCreateNodePrefixOp(c, .Negation, .Minus, "-"); - node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); return &node.base; }, .Tilde => { const node = try transCreateNodePrefixOp(c, .BitNot, .Tilde, "~"); - node.rhs = try parseCPrefixOpExpr(c, it, source_loc, scope); + node.rhs = try parseCPrefixOpExpr(c, it, source, source_loc, scope); return &node.base; }, .Asterisk => { - const prefix_op_expr = try parseCPrefixOpExpr(c, it, source_loc, scope); + const prefix_op_expr = try parseCPrefixOpExpr(c, it, source, source_loc, scope); return try transCreateNodePtrDeref(c, prefix_op_expr); }, else => { _ = it.prev(); - return try parseCSuffixOpExpr(c, it, source_loc, scope); + return try parseCSuffixOpExpr(c, it, source, source_loc, scope); }, } }