std zig tokenizer: don't require 3 newlines at the end of the source

This commit is contained in:
Andrew Kelley 2018-02-10 14:52:39 -05:00
parent a2bd9f8912
commit 8c31eaf2a8
4 changed files with 14 additions and 17 deletions

View File

@ -565,6 +565,15 @@ fn fmtMain(allocator: &mem.Allocator, file_paths: []const []const u8) !void {
var file = try io.File.openRead(allocator, file_path);
defer file.close();
const source_code = io.readFileAlloc(allocator, file_path) catch |err| {
warn("unable to open '{}': {}", file_path, err);
continue;
};
defer allocator.free(source_code);
var tokenizer = std.zig.Tokenizer.init(source_code);
var parser = std.zig.Parser.init(&tokenizer, allocator, file_path);
defer parser.deinit();
warn("opened {} (todo tokenize and parse and render)\n", file_path);
}
}

View File

@ -213,14 +213,11 @@ pub const Module = struct {
};
errdefer self.allocator.free(root_src_real_path);
const source_code = io.readFileAllocExtra(self.allocator, root_src_real_path, 3) catch |err| {
const source_code = io.readFileAlloc(self.allocator, root_src_real_path) catch |err| {
try printError("unable to open '{}': {}", root_src_real_path, err);
return err;
};
errdefer self.allocator.free(source_code);
source_code[source_code.len - 3] = '\n';
source_code[source_code.len - 2] = '\n';
source_code[source_code.len - 1] = '\n';
warn("====input:====\n");

View File

@ -524,16 +524,11 @@ pub fn writeFile(allocator: &mem.Allocator, path: []const u8, data: []const u8)
/// On success, caller owns returned buffer.
pub fn readFileAlloc(allocator: &mem.Allocator, path: []const u8) ![]u8 {
return readFileAllocExtra(allocator, path, 0);
}
/// On success, caller owns returned buffer.
/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized.
pub fn readFileAllocExtra(allocator: &mem.Allocator, path: []const u8, extra_len: usize) ![]u8 {
var file = try File.openRead(allocator, path);
defer file.close();
const size = try file.getEndPos();
const buf = try allocator.alloc(u8, size + extra_len);
const buf = try allocator.alloc(u8, size);
errdefer allocator.free(buf);
var adapter = FileInStream.init(&file);

View File

@ -175,12 +175,7 @@ pub const Tokenizer = struct {
std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
}
/// buffer must end with "\n\n\n". This is so that attempting to decode
/// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow.
pub fn init(buffer: []const u8) Tokenizer {
std.debug.assert(buffer[buffer.len - 1] == '\n');
std.debug.assert(buffer[buffer.len - 2] == '\n');
std.debug.assert(buffer[buffer.len - 3] == '\n');
return Tokenizer {
.buffer = buffer,
.index = 0,
@ -556,8 +551,9 @@ pub const Tokenizer = struct {
} else {
// check utf8-encoded character.
const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
// the last 3 bytes in the buffer are guaranteed to be '\n',
// which means we don't need to do any bounds checking here.
if (self.index + length >= self.buffer.len) {
return u3(self.buffer.len - self.index);
}
const bytes = self.buffer[self.index..self.index + length];
switch (length) {
2 => {