diff --git a/lib/std/zig/ast.zig b/lib/std/zig/ast.zig index 512e4ec265..8eefe4fb22 100644 --- a/lib/std/zig/ast.zig +++ b/lib/std/zig/ast.zig @@ -20,7 +20,7 @@ pub const NodeList = std.MultiArrayList(Node); pub const Tree = struct { /// Reference to externally-owned data. - source: []const u8, + source: [:0]const u8, tokens: TokenList.Slice, /// The root AST node is assumed to be index 0. Since there can be no diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 5bafcce5b1..f7e782851f 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -17,7 +17,7 @@ pub const Error = error{ParseError} || Allocator.Error; /// Result should be freed with tree.deinit() when there are /// no more references to any of the tokens or nodes. -pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!Tree { +pub fn parse(gpa: *Allocator, source: [:0]const u8) Allocator.Error!Tree { var tokens = ast.TokenList{}; defer tokens.deinit(gpa); diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 907e06a1a6..26ef344f2c 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -5194,7 +5194,7 @@ const maxInt = std.math.maxInt; var fixed_buffer_mem: [100 * 1024]u8 = undefined; -fn testParse(source: []const u8, allocator: *mem.Allocator, anything_changed: *bool) ![]u8 { +fn testParse(source: [:0]const u8, allocator: *mem.Allocator, anything_changed: *bool) ![]u8 { const stderr = io.getStdErr().writer(); var tree = try std.zig.parse(allocator, source); @@ -5222,7 +5222,7 @@ fn testParse(source: []const u8, allocator: *mem.Allocator, anything_changed: *b anything_changed.* = !mem.eql(u8, formatted, source); return formatted; } -fn testTransform(source: []const u8, expected_source: []const u8) !void { +fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { const needed_alloc_count = x: { // Try it once with unlimited memory, make sure it works var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); @@ -5268,13 +5268,13 @@ fn testTransform(source: []const u8, expected_source: []const u8) !void { } } } -fn testCanonical(source: []const u8) !void { +fn testCanonical(source: [:0]const u8) !void { return testTransform(source, source); } const Error = std.zig.ast.Error.Tag; -fn testError(source: []const u8, expected_errors: []const Error) !void { +fn testError(source: [:0]const u8, expected_errors: []const Error) !void { var tree = try std.zig.parse(std.testing.allocator, source); defer tree.deinit(std.testing.allocator); diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index 94a20d958b..35926ac730 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -326,7 +326,7 @@ pub const Token = struct { }; pub const Tokenizer = struct { - buffer: []const u8, + buffer: [:0]const u8, index: usize, pending_invalid_token: ?Token, @@ -335,7 +335,7 @@ pub const Tokenizer = struct { std.debug.warn("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.start..token.end] }); } - pub fn init(buffer: []const u8) Tokenizer { + pub fn init(buffer: [:0]const u8) Tokenizer { // Skip the UTF-8 BOM if present const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0); return Tokenizer{ @@ -373,7 +373,6 @@ pub const Tokenizer = struct { line_comment, doc_comment_start, doc_comment, - container_doc_comment, zero, int_literal_dec, int_literal_dec_no_underscore, @@ -407,10 +406,6 @@ pub const Tokenizer = struct { saw_at_sign, }; - fn isIdentifierChar(char: u8) bool { - return std.ascii.isAlNum(char) or char == '_'; - } - pub fn next(self: *Tokenizer) Token { if (self.pending_invalid_token) |token| { self.pending_invalid_token = null; @@ -426,10 +421,11 @@ pub const Tokenizer = struct { }; var seen_escape_digits: usize = undefined; var remaining_code_units: usize = undefined; - while (self.index < self.buffer.len) : (self.index += 1) { + while (true) : (self.index += 1) { const c = self.buffer[self.index]; switch (state) { .start => switch (c) { + 0 => break, ' ', '\n', '\t', '\r' => { result.loc.start = self.index + 1; }, @@ -705,18 +701,22 @@ pub const Tokenizer = struct { self.index += 1; break; }, - '\n', '\r' => break, // Look for this error later. + 0, '\n', '\r' => break, // Look for this error later. else => self.checkLiteralCharacter(), }, .string_literal_backslash => switch (c) { - '\n', '\r' => break, // Look for this error later. + 0, '\n', '\r' => break, // Look for this error later. else => { state = .string_literal; }, }, .char_literal => switch (c) { + 0 => { + result.tag = .invalid; + break; + }, '\\' => { state = .char_literal_backslash; }, @@ -742,7 +742,7 @@ pub const Tokenizer = struct { }, .char_literal_backslash => switch (c) { - '\n' => { + 0, '\n' => { result.tag = .invalid; break; }, @@ -834,6 +834,7 @@ pub const Tokenizer = struct { }, .multiline_string_literal_line => switch (c) { + 0 => break, '\n' => { self.index += 1; break; @@ -1025,12 +1026,13 @@ pub const Tokenizer = struct { }, }, .line_comment_start => switch (c) { + 0 => break, '/' => { state = .doc_comment_start; }, '!' => { result.tag = .container_doc_comment; - state = .container_doc_comment; + state = .doc_comment; }, '\n' => { state = .start; @@ -1046,7 +1048,7 @@ pub const Tokenizer = struct { '/' => { state = .line_comment; }, - '\n' => { + 0, '\n' => { result.tag = .doc_comment; break; }, @@ -1061,6 +1063,7 @@ pub const Tokenizer = struct { }, }, .line_comment => switch (c) { + 0 => break, '\n' => { state = .start; result.loc.start = self.index + 1; @@ -1068,8 +1071,8 @@ pub const Tokenizer = struct { '\t', '\r' => {}, else => self.checkLiteralCharacter(), }, - .doc_comment, .container_doc_comment => switch (c) { - '\n' => break, + .doc_comment => switch (c) { + 0, '\n' => break, '\t', '\r' => {}, else => self.checkLiteralCharacter(), }, @@ -1088,12 +1091,11 @@ pub const Tokenizer = struct { self.index -= 1; state = .int_literal_dec; }, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'a', 'c', 'd', 'f'...'n', 'p'...'w', 'y', 'z', 'A'...'D', 'F'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .int_literal_bin_no_underscore => switch (c) { '0'...'1' => { @@ -1109,12 +1111,11 @@ pub const Tokenizer = struct { state = .int_literal_bin_no_underscore; }, '0'...'1' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + '2'...'9', 'a'...'z', 'A'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .int_literal_oct_no_underscore => switch (c) { '0'...'7' => { @@ -1130,12 +1131,11 @@ pub const Tokenizer = struct { state = .int_literal_oct_no_underscore; }, '0'...'7' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + '8', '9', 'a'...'z', 'A'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .int_literal_dec_no_underscore => switch (c) { '0'...'9' => { @@ -1159,12 +1159,11 @@ pub const Tokenizer = struct { result.tag = .float_literal; }, '0'...'9' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .int_literal_hex_no_underscore => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { @@ -1188,12 +1187,11 @@ pub const Tokenizer = struct { result.tag = .float_literal; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .num_dot_dec => switch (c) { '.' => { @@ -1206,12 +1204,11 @@ pub const Tokenizer = struct { result.tag = .float_literal; state = .float_fraction_dec; }, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + '_', 'a'...'z', 'A'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .num_dot_hex => switch (c) { '.' => { @@ -1224,12 +1221,11 @@ pub const Tokenizer = struct { result.tag = .float_literal; state = .float_fraction_hex; }, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + '_', 'g'...'z', 'G'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .float_fraction_dec_no_underscore => switch (c) { '0'...'9' => { @@ -1248,12 +1244,11 @@ pub const Tokenizer = struct { state = .float_exponent_unsigned; }, '0'...'9' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'a'...'d', 'f'...'z', 'A'...'D', 'F'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .float_fraction_hex_no_underscore => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { @@ -1272,12 +1267,11 @@ pub const Tokenizer = struct { state = .float_exponent_unsigned; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'g'...'o', 'q'...'z', 'G'...'O', 'Q'...'Z' => { + result.tag = .invalid; break; }, + else => break, }, .float_exponent_unsigned => switch (c) { '+', '-' => { @@ -1303,130 +1297,11 @@ pub const Tokenizer = struct { state = .float_exponent_num_no_underscore; }, '0'...'9' => {}, - else => { - if (isIdentifierChar(c)) { - result.tag = .invalid; - } + 'a'...'z', 'A'...'Z' => { + result.tag = .invalid; break; }, - }, - } - } else if (self.index == self.buffer.len) { - switch (state) { - .start, - .int_literal_dec, - .int_literal_bin, - .int_literal_oct, - .int_literal_hex, - .num_dot_dec, - .num_dot_hex, - .float_fraction_dec, - .float_fraction_hex, - .float_exponent_num, - .string_literal, // find this error later - .multiline_string_literal_line, - .builtin, - .line_comment, - .line_comment_start, - => {}, - - .identifier => { - if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| { - result.tag = tag; - } - }, - .doc_comment, .doc_comment_start => { - result.tag = .doc_comment; - }, - .container_doc_comment => { - result.tag = .container_doc_comment; - }, - - .int_literal_dec_no_underscore, - .int_literal_bin_no_underscore, - .int_literal_oct_no_underscore, - .int_literal_hex_no_underscore, - .float_fraction_dec_no_underscore, - .float_fraction_hex_no_underscore, - .float_exponent_num_no_underscore, - .float_exponent_unsigned, - .saw_at_sign, - .backslash, - .char_literal, - .char_literal_backslash, - .char_literal_hex_escape, - .char_literal_unicode_escape_saw_u, - .char_literal_unicode_escape, - .char_literal_unicode_invalid, - .char_literal_end, - .char_literal_unicode, - .string_literal_backslash, - => { - result.tag = .invalid; - }, - - .equal => { - result.tag = .equal; - }, - .bang => { - result.tag = .bang; - }, - .minus => { - result.tag = .minus; - }, - .slash => { - result.tag = .slash; - }, - .zero => { - result.tag = .integer_literal; - }, - .ampersand => { - result.tag = .ampersand; - }, - .period => { - result.tag = .period; - }, - .period_2 => { - result.tag = .ellipsis2; - }, - .period_asterisk => { - result.tag = .period_asterisk; - }, - .pipe => { - result.tag = .pipe; - }, - .angle_bracket_angle_bracket_right => { - result.tag = .angle_bracket_angle_bracket_right; - }, - .angle_bracket_right => { - result.tag = .angle_bracket_right; - }, - .angle_bracket_angle_bracket_left => { - result.tag = .angle_bracket_angle_bracket_left; - }, - .angle_bracket_left => { - result.tag = .angle_bracket_left; - }, - .plus_percent => { - result.tag = .plus_percent; - }, - .plus => { - result.tag = .plus; - }, - .percent => { - result.tag = .percent; - }, - .caret => { - result.tag = .caret; - }, - .asterisk_percent => { - result.tag = .asterisk_percent; - }, - .asterisk => { - result.tag = .asterisk; - }, - .minus_percent => { - result.tag = .minus_percent; + else => break, }, } } diff --git a/src/Compilation.zig b/src/Compilation.zig index c1cca84f62..68a008e0e1 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1541,7 +1541,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { } } - if (comp.bin_file.options.use_stage1) { + if (comp.bin_file.options.use_stage1 and comp.bin_file.options.module != null) { try comp.work_queue.writeItem(.{ .stage1_module = {} }); } diff --git a/src/translate_c/ast.zig b/src/translate_c/ast.zig index 70c8c6559f..cdf8d778b2 100644 --- a/src/translate_c/ast.zig +++ b/src/translate_c/ast.zig @@ -754,7 +754,7 @@ pub fn render(gpa: *Allocator, nodes: []const Node) !std.zig.ast.Tree { }); return std.zig.ast.Tree{ - .source = ctx.buf.toOwnedSlice(), + .source = try ctx.buf.toOwnedSliceSentinel(0), .tokens = ctx.tokens.toOwnedSlice(), .nodes = ctx.nodes.toOwnedSlice(), .extra_data = ctx.extra_data.toOwnedSlice(gpa),