mirror of
https://github.com/ziglang/zig.git
synced 2025-12-06 14:23:09 +00:00
Part of #19063. Primarily, this moves Aro from deps/ to lib/compiler/ so that it can be lazily compiled from source. src/aro_translate_c.zig is moved to lib/compiler/aro_translate_c.zig and some of Zig CLI logic moved to a main() function there. aro_translate_c.zig becomes the "common" import for clang-based translate-c. Not all of the compiler was able to be detangled from Aro, however, so it still, for now, remains being compiled with the main compiler sources due to the clang-based translate-c depending on it. Once aro-based translate-c achieves feature parity with the clang-based translate-c implementation, the clang-based one can be removed from Zig. Aro made it unnecessarily difficult to depend on with these .def files and all these Zig module requirements. I looked at the .def files and made these observations: - The canonical source is llvm .def files. - Therefore there is an update process to sync with llvm that involves regenerating the .def files in Aro. - Therefore you might as well just regenerate the .zig files directly and check those into Aro. - Also with a small amount of tinkering, the file size on disk of these generated .zig files can be made many times smaller, without compromising type safety in the usage of the data. This would make things much easier on Zig as downstream project, particularly we could remove those pesky stubs when bootstrapping. I have gone ahead with these changes since they unblock me and I will have a chat with Vexu to see what he thinks.
128 lines
3.8 KiB
Zig
128 lines
3.8 KiB
Zig
const std = @import("std");
|
|
|
|
pub const Id = enum(u32) {
|
|
unused = 0,
|
|
generated = 1,
|
|
_,
|
|
};
|
|
|
|
/// Classifies the file for line marker output in -E mode
|
|
pub const Kind = enum {
|
|
/// regular file
|
|
user,
|
|
/// Included from a system include directory
|
|
system,
|
|
/// Included from an "implicit extern C" directory
|
|
extern_c_system,
|
|
};
|
|
|
|
pub const Location = struct {
|
|
id: Id = .unused,
|
|
byte_offset: u32 = 0,
|
|
line: u32 = 0,
|
|
|
|
pub fn eql(a: Location, b: Location) bool {
|
|
return a.id == b.id and a.byte_offset == b.byte_offset and a.line == b.line;
|
|
}
|
|
};
|
|
|
|
const Source = @This();
|
|
|
|
path: []const u8,
|
|
buf: []const u8,
|
|
id: Id,
|
|
/// each entry represents a byte position within `buf` where a backslash+newline was deleted
|
|
/// from the original raw buffer. The same position can appear multiple times if multiple
|
|
/// consecutive splices happened. Guaranteed to be non-decreasing
|
|
splice_locs: []const u32,
|
|
kind: Kind,
|
|
|
|
/// Todo: binary search instead of scanning entire `splice_locs`.
|
|
pub fn numSplicesBefore(source: Source, byte_offset: u32) u32 {
|
|
for (source.splice_locs, 0..) |splice_offset, i| {
|
|
if (splice_offset > byte_offset) return @intCast(i);
|
|
}
|
|
return @intCast(source.splice_locs.len);
|
|
}
|
|
|
|
/// Returns the actual line number (before newline splicing) of a Location
|
|
/// This corresponds to what the user would actually see in their text editor
|
|
pub fn physicalLine(source: Source, loc: Location) u32 {
|
|
return loc.line + source.numSplicesBefore(loc.byte_offset);
|
|
}
|
|
|
|
const LineCol = struct { line: []const u8, line_no: u32, col: u32, width: u32, end_with_splice: bool };
|
|
|
|
pub fn lineCol(source: Source, loc: Location) LineCol {
|
|
var start: usize = 0;
|
|
// find the start of the line which is either a newline or a splice
|
|
if (std.mem.lastIndexOfScalar(u8, source.buf[0..loc.byte_offset], '\n')) |some| start = some + 1;
|
|
const splice_index: u32 = for (source.splice_locs, 0..) |splice_offset, i| {
|
|
if (splice_offset > start) {
|
|
if (splice_offset < loc.byte_offset) {
|
|
start = splice_offset;
|
|
break @as(u32, @intCast(i)) + 1;
|
|
}
|
|
break @intCast(i);
|
|
}
|
|
} else @intCast(source.splice_locs.len);
|
|
var i: usize = start;
|
|
var col: u32 = 1;
|
|
var width: u32 = 0;
|
|
|
|
while (i < loc.byte_offset) : (col += 1) { // TODO this is still incorrect, but better
|
|
const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch {
|
|
i += 1;
|
|
continue;
|
|
};
|
|
const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch {
|
|
i += 1;
|
|
continue;
|
|
};
|
|
width += codepointWidth(cp);
|
|
i += len;
|
|
}
|
|
|
|
// find the end of the line which is either a newline, EOF or a splice
|
|
var nl = source.buf.len;
|
|
var end_with_splice = false;
|
|
if (std.mem.indexOfScalar(u8, source.buf[start..], '\n')) |some| nl = some + start;
|
|
if (source.splice_locs.len > splice_index and nl > source.splice_locs[splice_index] and source.splice_locs[splice_index] > start) {
|
|
end_with_splice = true;
|
|
nl = source.splice_locs[splice_index];
|
|
}
|
|
return .{
|
|
.line = source.buf[start..nl],
|
|
.line_no = loc.line + splice_index,
|
|
.col = col,
|
|
.width = width,
|
|
.end_with_splice = end_with_splice,
|
|
};
|
|
}
|
|
|
|
fn codepointWidth(cp: u32) u32 {
|
|
return switch (cp) {
|
|
0x1100...0x115F,
|
|
0x2329,
|
|
0x232A,
|
|
0x2E80...0x303F,
|
|
0x3040...0x3247,
|
|
0x3250...0x4DBF,
|
|
0x4E00...0xA4C6,
|
|
0xA960...0xA97C,
|
|
0xAC00...0xD7A3,
|
|
0xF900...0xFAFF,
|
|
0xFE10...0xFE19,
|
|
0xFE30...0xFE6B,
|
|
0xFF01...0xFF60,
|
|
0xFFE0...0xFFE6,
|
|
0x1B000...0x1B001,
|
|
0x1F200...0x1F251,
|
|
0x20000...0x3FFFD,
|
|
0x1F300...0x1F5FF,
|
|
0x1F900...0x1F9FF,
|
|
=> 2,
|
|
else => 1,
|
|
};
|
|
}
|