diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig index cfed6c3db0..2b6e20253d 100644 --- a/src-self-hosted/main.zig +++ b/src-self-hosted/main.zig @@ -565,6 +565,15 @@ fn fmtMain(allocator: &mem.Allocator, file_paths: []const []const u8) !void { var file = try io.File.openRead(allocator, file_path); defer file.close(); + const source_code = io.readFileAlloc(allocator, file_path) catch |err| { + warn("unable to open '{}': {}", file_path, err); + continue; + }; + defer allocator.free(source_code); + + var tokenizer = std.zig.Tokenizer.init(source_code); + var parser = std.zig.Parser.init(&tokenizer, allocator, file_path); + defer parser.deinit(); warn("opened {} (todo tokenize and parse and render)\n", file_path); } } diff --git a/src-self-hosted/module.zig b/src-self-hosted/module.zig index bdb268cf2f..43bba22757 100644 --- a/src-self-hosted/module.zig +++ b/src-self-hosted/module.zig @@ -213,14 +213,11 @@ pub const Module = struct { }; errdefer self.allocator.free(root_src_real_path); - const source_code = io.readFileAllocExtra(self.allocator, root_src_real_path, 3) catch |err| { + const source_code = io.readFileAlloc(self.allocator, root_src_real_path) catch |err| { try printError("unable to open '{}': {}", root_src_real_path, err); return err; }; errdefer self.allocator.free(source_code); - source_code[source_code.len - 3] = '\n'; - source_code[source_code.len - 2] = '\n'; - source_code[source_code.len - 1] = '\n'; warn("====input:====\n"); diff --git a/std/io.zig b/std/io.zig index 9fca6aa6f7..7457416b29 100644 --- a/std/io.zig +++ b/std/io.zig @@ -524,16 +524,11 @@ pub fn writeFile(allocator: &mem.Allocator, path: []const u8, data: []const u8) /// On success, caller owns returned buffer. pub fn readFileAlloc(allocator: &mem.Allocator, path: []const u8) ![]u8 { - return readFileAllocExtra(allocator, path, 0); -} -/// On success, caller owns returned buffer. -/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized. -pub fn readFileAllocExtra(allocator: &mem.Allocator, path: []const u8, extra_len: usize) ![]u8 { var file = try File.openRead(allocator, path); defer file.close(); const size = try file.getEndPos(); - const buf = try allocator.alloc(u8, size + extra_len); + const buf = try allocator.alloc(u8, size); errdefer allocator.free(buf); var adapter = FileInStream.init(&file); diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig index e9bfb21633..546356caa3 100644 --- a/std/zig/tokenizer.zig +++ b/std/zig/tokenizer.zig @@ -175,12 +175,7 @@ pub const Tokenizer = struct { std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]); } - /// buffer must end with "\n\n\n". This is so that attempting to decode - /// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow. pub fn init(buffer: []const u8) Tokenizer { - std.debug.assert(buffer[buffer.len - 1] == '\n'); - std.debug.assert(buffer[buffer.len - 2] == '\n'); - std.debug.assert(buffer[buffer.len - 3] == '\n'); return Tokenizer { .buffer = buffer, .index = 0, @@ -556,8 +551,9 @@ pub const Tokenizer = struct { } else { // check utf8-encoded character. const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1; - // the last 3 bytes in the buffer are guaranteed to be '\n', - // which means we don't need to do any bounds checking here. + if (self.index + length >= self.buffer.len) { + return u3(self.buffer.len - self.index); + } const bytes = self.buffer[self.index..self.index + length]; switch (length) { 2 => {