mirror of
https://github.com/ziglang/zig.git
synced 2026-01-01 11:03:11 +00:00
zig fmt: factorize source file reading and decoding
Now reading a source file and decoding it from UTF-16LE to UTF-8 is done in a single function. Error messages are improved, and an error is emitted when the source file has a BOM not supported (UTF-16BE, UTF-32). Please note that the BOM of UTF-32 is composed of the same bytes as the BOM of UTF-16 followed by a null character. Therefore a source file in UTF-16LE starting with a null byte will be interpreted as an UTF-32, and rejeted because of an invalid format. In pratice this is not a problem, as the code would have been rejected later anyway because of the null character.
This commit is contained in:
parent
a354000090
commit
8942243f7a
102
src/main.zig
102
src/main.zig
@ -2637,6 +2637,50 @@ fn argvCmd(allocator: *Allocator, argv: []const []const u8) ![]u8 {
|
||||
return cmd.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn readSourceFileToEndAlloc(allocator: *mem.Allocator, input: *const fs.File, size_hint: ?usize) ![]const u8 {
|
||||
const source_code = input.readToEndAllocOptions(
|
||||
allocator,
|
||||
max_src_size,
|
||||
size_hint,
|
||||
@alignOf(u16),
|
||||
null,
|
||||
) catch |err| switch (err) {
|
||||
error.ConnectionResetByPeer => unreachable,
|
||||
error.ConnectionTimedOut => unreachable,
|
||||
error.NotOpenForReading => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
errdefer allocator.free(source_code);
|
||||
|
||||
// Detect unsupported file types with their Byte Order Mark
|
||||
const unsupported_boms = [_][]const u8{
|
||||
"\xff\xfe\x00\x00", // UTF-32 little endian
|
||||
"\xfe\xff\x00\x00", // UTF-32 big endian
|
||||
"\xfe\xff", // UTF-16 big endian
|
||||
};
|
||||
for (unsupported_boms) |bom| {
|
||||
if (mem.startsWith(u8, source_code, bom)) {
|
||||
return error.UnsupportedEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
|
||||
if (mem.startsWith(u8, source_code, "\xff\xfe")) {
|
||||
const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
|
||||
const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(allocator, source_code_utf16_le) catch |err| switch (err) {
|
||||
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
|
||||
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
|
||||
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
|
||||
else => |e| return e,
|
||||
};
|
||||
|
||||
allocator.free(source_code);
|
||||
return source_code_utf8;
|
||||
}
|
||||
|
||||
return source_code;
|
||||
}
|
||||
|
||||
pub const usage_fmt =
|
||||
\\Usage: zig fmt [file]...
|
||||
\\
|
||||
@ -2709,20 +2753,8 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
|
||||
}
|
||||
|
||||
const stdin = io.getStdIn();
|
||||
|
||||
const source_code = blk: {
|
||||
const source_code = try stdin.readToEndAllocOptions(gpa, max_src_size, null, @alignOf(u16), null);
|
||||
errdefer gpa.free(source_code);
|
||||
|
||||
// If the file starts with a UTF-16 BOM, translate it to UTF-8
|
||||
if (mem.startsWith(u8, source_code, "\xff\xfe")) {
|
||||
const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
|
||||
const source_code_utf8 = try std.unicode.utf16leToUtf8Alloc(gpa, source_code_utf16_le);
|
||||
gpa.free(source_code);
|
||||
break :blk source_code_utf8;
|
||||
} else {
|
||||
break :blk source_code;
|
||||
}
|
||||
const source_code = readSourceFileToEndAlloc(gpa, &stdin, null) catch |err| {
|
||||
fatal("unable to read stdin: {s}", .{err});
|
||||
};
|
||||
defer gpa.free(source_code);
|
||||
|
||||
@ -2798,7 +2830,7 @@ const FmtError = error{
|
||||
EndOfStream,
|
||||
Unseekable,
|
||||
NotOpenForWriting,
|
||||
UnknownTextFormat,
|
||||
UnsupportedEncoding,
|
||||
} || fs.File.OpenError;
|
||||
|
||||
fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool, dir: fs.Dir, sub_path: []const u8) FmtError!void {
|
||||
@ -2864,40 +2896,16 @@ fn fmtPathFile(
|
||||
if (stat.kind == .Directory)
|
||||
return error.IsDir;
|
||||
|
||||
const source_code = blk: {
|
||||
const source_code = source_file.readToEndAllocOptions(
|
||||
fmt.gpa,
|
||||
max_src_size,
|
||||
std.math.cast(usize, stat.size) catch return error.FileTooBig,
|
||||
@alignOf(u16),
|
||||
null,
|
||||
) catch |err| switch (err) {
|
||||
error.ConnectionResetByPeer => unreachable,
|
||||
error.ConnectionTimedOut => unreachable,
|
||||
error.NotOpenForReading => unreachable,
|
||||
else => |e| return e,
|
||||
};
|
||||
source_file.close();
|
||||
file_closed = true;
|
||||
errdefer fmt.gpa.free(source_code);
|
||||
|
||||
// If the file starts with a UTF-16 BOM, translate it to UTF-8
|
||||
if (mem.eql(u8, source_code[0..2], "\xff\xfe")) {
|
||||
const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
|
||||
const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(fmt.gpa, source_code_utf16_le) catch |err| return switch (err) {
|
||||
error.DanglingSurrogateHalf => FmtError.UnknownTextFormat,
|
||||
error.ExpectedSecondSurrogateHalf => FmtError.UnknownTextFormat,
|
||||
error.UnexpectedSecondSurrogateHalf => FmtError.UnknownTextFormat,
|
||||
else => |e| e,
|
||||
};
|
||||
fmt.gpa.free(source_code);
|
||||
break :blk source_code_utf8;
|
||||
} else {
|
||||
break :blk source_code;
|
||||
}
|
||||
};
|
||||
const source_code = try readSourceFileToEndAlloc(
|
||||
fmt.gpa,
|
||||
&source_file,
|
||||
std.math.cast(usize, stat.size) catch return error.FileTooBig,
|
||||
);
|
||||
defer fmt.gpa.free(source_code);
|
||||
|
||||
source_file.close();
|
||||
file_closed = true;
|
||||
|
||||
// Add to set after no longer possible to get error.IsDir.
|
||||
if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user