Merge pull request #17524 from Vexu/aro-translate-c

Add ability to test Aro based `translate-c`
This commit is contained in:
Andrew Kelley 2023-10-18 00:32:59 -04:00 committed by GitHub
commit 149f2f8adb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
42 changed files with 14599 additions and 36267 deletions

View File

@ -977,10 +977,11 @@ fn fromStringC2X(namespace: ?[]const u8, name: []const u8) ?Tag {
}
fn fromStringDeclspec(name: []const u8) ?Tag {
const normalized = normalize(name);
const decls = @typeInfo(attributes).Struct.decls;
inline for (decls, 0..) |decl, i| {
if (@hasDecl(@field(attributes, decl.name), "declspec")) {
if (mem.eql(u8, @field(attributes, decl.name).declspec, name)) {
if (mem.eql(u8, @field(attributes, decl.name).declspec, normalized)) {
return @enumFromInt(i);
}
}

35
deps/aro/Builtins.zig vendored
View File

@ -125,6 +125,18 @@ fn createType(desc: TypeDescription, it: *TypeDescription.TypeIterator, comp: *c
std.debug.assert(builder.specifier == .none);
builder.specifier = Type.Builder.fromType(comp.types.ns_constant_string.ty);
},
.G => {
// Todo: id
return .{ .specifier = .invalid };
},
.H => {
// Todo: SEL
return .{ .specifier = .invalid };
},
.M => {
// Todo: struct objc_super
return .{ .specifier = .invalid };
},
.a => {
std.debug.assert(builder.specifier == .none);
std.debug.assert(desc.suffix.len == 0);
@ -260,8 +272,7 @@ fn createBuiltin(comp: *const Compilation, builtin: BuiltinFunction, type_arena:
/// Asserts that the builtin has already been created
pub fn lookup(b: *const Builtins, name: []const u8) Expanded {
@setEvalBranchQuota(10_000);
const builtin = BuiltinFunction.fromTag(std.meta.stringToEnum(BuiltinFunction.Tag, name).?);
const builtin = BuiltinFunction.fromName(name).?;
const ty = b._name_to_type_map.get(name).?;
return .{
.builtin = builtin,
@ -271,9 +282,7 @@ pub fn lookup(b: *const Builtins, name: []const u8) Expanded {
pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_arena: std.mem.Allocator) !?Expanded {
const ty = b._name_to_type_map.get(name) orelse {
@setEvalBranchQuota(10_000);
const tag = std.meta.stringToEnum(BuiltinFunction.Tag, name) orelse return null;
const builtin = BuiltinFunction.fromTag(tag);
const builtin = BuiltinFunction.fromName(name) orelse return null;
if (!comp.hasBuiltinFunction(builtin)) return null;
try b._name_to_type_map.ensureUnusedCapacity(comp.gpa, 1);
@ -285,7 +294,7 @@ pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_aren
.ty = ty,
};
};
const builtin = BuiltinFunction.fromTag(std.meta.stringToEnum(BuiltinFunction.Tag, name).?);
const builtin = BuiltinFunction.fromName(name).?;
return .{
.builtin = builtin,
.ty = ty,
@ -301,9 +310,9 @@ test "All builtins" {
const type_arena = arena.allocator();
for (0..@typeInfo(BuiltinFunction.Tag).Enum.fields.len) |i| {
const tag: BuiltinFunction.Tag = @enumFromInt(i);
const name = @tagName(tag);
var builtin_it = BuiltinFunction.BuiltinsIterator{};
while (builtin_it.next()) |entry| {
const name = try type_arena.dupe(u8, entry.name);
if (try comp.builtins.getOrCreate(&comp, name, type_arena)) |func_ty| {
const get_again = (try comp.builtins.getOrCreate(&comp, name, std.testing.failing_allocator)).?;
const found_by_lookup = comp.builtins.lookup(name);
@ -325,10 +334,10 @@ test "Allocation failures" {
const type_arena = arena.allocator();
const num_builtins = 40;
for (0..num_builtins) |i| {
const tag: BuiltinFunction.Tag = @enumFromInt(i);
const name = @tagName(tag);
_ = try comp.builtins.getOrCreate(&comp, name, type_arena);
var builtin_it = BuiltinFunction.BuiltinsIterator{};
for (0..num_builtins) |_| {
const entry = builtin_it.next().?;
_ = try comp.builtins.getOrCreate(&comp, entry.name, type_arena);
}
}
};

298
deps/aro/CharLiteral.zig vendored Normal file
View File

@ -0,0 +1,298 @@
const std = @import("std");
const Compilation = @import("Compilation.zig");
const Type = @import("Type.zig");
const Diagnostics = @import("Diagnostics.zig");
const Tokenizer = @import("Tokenizer.zig");
const mem = std.mem;
pub const Item = union(enum) {
/// decoded escape
value: u32,
/// Char literal in the source text is not utf8 encoded
improperly_encoded: []const u8,
/// 1 or more unescaped bytes
utf8_text: std.unicode.Utf8View,
};
const CharDiagnostic = struct {
tag: Diagnostics.Tag,
extra: Diagnostics.Message.Extra,
};
pub const Kind = enum {
char,
wide,
utf_8,
utf_16,
utf_32,
pub fn classify(id: Tokenizer.Token.Id) Kind {
return switch (id) {
.char_literal,
.string_literal,
=> .char,
.char_literal_utf_8,
.string_literal_utf_8,
=> .utf_8,
.char_literal_wide,
.string_literal_wide,
=> .wide,
.char_literal_utf_16,
.string_literal_utf_16,
=> .utf_16,
.char_literal_utf_32,
.string_literal_utf_32,
=> .utf_32,
else => unreachable,
};
}
/// Largest unicode codepoint that can be represented by this character kind
/// May be smaller than the largest value that can be represented.
/// For example u8 char literals may only specify 0-127 via literals or
/// character escapes, but may specify up to \xFF via hex escapes.
pub fn maxCodepoint(kind: Kind, comp: *const Compilation) u21 {
return @intCast(switch (kind) {
.char => std.math.maxInt(u7),
.wide => @min(0x10FFFF, comp.types.wchar.maxInt(comp)),
.utf_8 => std.math.maxInt(u7),
.utf_16 => std.math.maxInt(u16),
.utf_32 => 0x10FFFF,
});
}
/// Largest integer that can be represented by this character kind
pub fn maxInt(kind: Kind, comp: *const Compilation) u32 {
return @intCast(switch (kind) {
.char, .utf_8 => std.math.maxInt(u8),
.wide => comp.types.wchar.maxInt(comp),
.utf_16 => std.math.maxInt(u16),
.utf_32 => std.math.maxInt(u32),
});
}
pub fn charLiteralType(kind: Kind, comp: *const Compilation) Type {
return switch (kind) {
.char => Type.int,
.wide => comp.types.wchar,
.utf_8 => .{ .specifier = .uchar },
.utf_16 => comp.types.uint_least16_t,
.utf_32 => comp.types.uint_least32_t,
};
}
/// Return the actual contents of the string literal with leading / trailing quotes and
/// specifiers removed
pub fn contentSlice(kind: Kind, delimited: []const u8) []const u8 {
const end = delimited.len - 1; // remove trailing quote
return switch (kind) {
.char => delimited[1..end],
.wide => delimited[2..end],
.utf_8 => delimited[3..end],
.utf_16 => delimited[2..end],
.utf_32 => delimited[2..end],
};
}
};
pub const Parser = struct {
literal: []const u8,
i: usize = 0,
kind: Kind,
/// We only want to issue a max of 1 error per char literal
errored: bool = false,
errors: std.BoundedArray(CharDiagnostic, 4) = .{},
comp: *const Compilation,
pub fn init(literal: []const u8, kind: Kind, comp: *const Compilation) Parser {
return .{
.literal = literal,
.comp = comp,
.kind = kind,
};
}
pub fn err(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
if (self.errored) return;
self.errored = true;
self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
}
pub fn warn(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
if (self.errored) return;
self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
}
pub fn next(self: *Parser) ?Item {
if (self.i >= self.literal.len) return null;
const start = self.i;
if (self.literal[start] != '\\') {
self.i = mem.indexOfScalarPos(u8, self.literal, start + 1, '\\') orelse self.literal.len;
const unescaped_slice = self.literal[start..self.i];
const view = std.unicode.Utf8View.init(unescaped_slice) catch {
if (self.kind != .char) {
self.err(.illegal_char_encoding_error, .{ .none = {} });
} else {
self.warn(.illegal_char_encoding_warning, .{ .none = {} });
}
return .{ .improperly_encoded = self.literal[start..self.i] };
};
return .{ .utf8_text = view };
}
switch (self.literal[start + 1]) {
'u', 'U' => return self.parseUnicodeEscape(),
else => return self.parseEscapedChar(),
}
}
fn parseUnicodeEscape(self: *Parser) ?Item {
const start = self.i;
std.debug.assert(self.literal[self.i] == '\\');
const kind = self.literal[self.i + 1];
std.debug.assert(kind == 'u' or kind == 'U');
self.i += 2;
if (self.i >= self.literal.len or !std.ascii.isHex(self.literal[self.i])) {
self.err(.missing_hex_escape, .{ .ascii = @intCast(kind) });
return null;
}
const expected_len: usize = if (kind == 'u') 4 else 8;
var overflowed = false;
var count: usize = 0;
var val: u32 = 0;
for (self.literal[self.i..], 0..) |c, i| {
if (i == expected_len) break;
const char = std.fmt.charToDigit(c, 16) catch {
break;
};
val, const overflow = @shlWithOverflow(val, 4);
overflowed = overflowed or overflow != 0;
val |= char;
count += 1;
}
self.i += expected_len;
if (overflowed) {
self.err(.escape_sequence_overflow, .{ .unsigned = start });
return null;
}
if (count != expected_len) {
self.err(.incomplete_universal_character, .{ .none = {} });
return null;
}
if (val > std.math.maxInt(u21) or !std.unicode.utf8ValidCodepoint(@intCast(val))) {
self.err(.invalid_universal_character, .{ .unsigned = start });
return null;
}
if (val > self.kind.maxCodepoint(self.comp)) {
self.err(.char_too_large, .{ .none = {} });
}
if (val < 0xA0 and (val != '$' and val != '@' and val != '`')) {
const is_error = !self.comp.langopts.standard.atLeast(.c2x);
if (val >= 0x20 and val <= 0x7F) {
if (is_error) {
self.err(.ucn_basic_char_error, .{ .ascii = @intCast(val) });
} else {
self.warn(.ucn_basic_char_warning, .{ .ascii = @intCast(val) });
}
} else {
if (is_error) {
self.err(.ucn_control_char_error, .{ .none = {} });
} else {
self.warn(.ucn_control_char_warning, .{ .none = {} });
}
}
}
self.warn(.c89_ucn_in_literal, .{ .none = {} });
return .{ .value = val };
}
fn parseEscapedChar(self: *Parser) Item {
self.i += 1;
const c = self.literal[self.i];
defer if (c != 'x' and (c < '0' or c > '7')) {
self.i += 1;
};
switch (c) {
'\n' => unreachable, // removed by line splicing
'\r' => unreachable, // removed by line splicing
'\'', '\"', '\\', '?' => return .{ .value = c },
'n' => return .{ .value = '\n' },
'r' => return .{ .value = '\r' },
't' => return .{ .value = '\t' },
'a' => return .{ .value = 0x07 },
'b' => return .{ .value = 0x08 },
'e', 'E' => {
self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = 0x1B };
},
'(', '{', '[', '%' => {
self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = c };
},
'f' => return .{ .value = 0x0C },
'v' => return .{ .value = 0x0B },
'x' => return .{ .value = self.parseNumberEscape(.hex) },
'0'...'7' => return .{ .value = self.parseNumberEscape(.octal) },
'u', 'U' => unreachable, // handled by parseUnicodeEscape
else => {
self.warn(.unknown_escape_sequence, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
return .{ .value = c };
},
}
}
fn parseNumberEscape(self: *Parser, base: EscapeBase) u32 {
var val: u32 = 0;
var count: usize = 0;
var overflowed = false;
defer self.i += count;
const slice = switch (base) {
.octal => self.literal[self.i..@min(self.literal.len, self.i + 3)], // max 3 chars
.hex => blk: {
self.i += 1;
break :blk self.literal[self.i..]; // skip over 'x'; could have an arbitrary number of chars
},
};
for (slice) |c| {
const char = std.fmt.charToDigit(c, @intFromEnum(base)) catch break;
val, const overflow = @shlWithOverflow(val, base.log2());
if (overflow != 0) overflowed = true;
val += char;
count += 1;
}
if (overflowed or val > self.kind.maxInt(self.comp)) {
self.err(.escape_sequence_overflow, .{ .unsigned = 0 });
}
if (count == 0) {
std.debug.assert(base == .hex);
self.err(.missing_hex_escape, .{ .ascii = 'x' });
}
return val;
}
};
const EscapeBase = enum(u8) {
octal = 8,
hex = 16,
fn log2(base: EscapeBase) u4 {
return switch (base) {
.octal => 3,
.hex => 4,
};
}
};

View File

@ -1162,7 +1162,7 @@ fn genBoolExpr(c: *CodeGen, base: NodeIndex, true_label: Ir.Ref, false_label: Ir
fn genBuiltinCall(c: *CodeGen, builtin: BuiltinFunction, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {
_ = arg_nodes;
_ = ty;
return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{@tagName(builtin.tag)});
return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{BuiltinFunction.nameFromTag(builtin.tag).span()});
}
fn genCall(c: *CodeGen, fn_node: NodeIndex, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {

View File

@ -100,6 +100,8 @@ generated_buf: std.ArrayList(u8),
builtins: Builtins = .{},
types: struct {
wchar: Type = undefined,
uint_least16_t: Type = undefined,
uint_least32_t: Type = undefined,
ptrdiff: Type = undefined,
size: Type = undefined,
va_list: Type = undefined,
@ -120,9 +122,8 @@ types: struct {
int16: Type = .{ .specifier = .invalid },
int64: Type = .{ .specifier = .invalid },
} = .{},
/// Mapping from Source.Id to byte offset of first non-utf8 byte
invalid_utf8_locs: std.AutoHashMapUnmanaged(Source.Id, u32) = .{},
string_interner: StringInterner = .{},
ms_cwd_source_id: ?Source.Id = null,
pub fn init(gpa: Allocator) Compilation {
return .{
@ -153,7 +154,6 @@ pub fn deinit(comp: *Compilation) void {
comp.pragma_handlers.deinit();
comp.generated_buf.deinit();
comp.builtins.deinit(comp.gpa);
comp.invalid_utf8_locs.deinit(comp.gpa);
comp.string_interner.deinit(comp.gpa);
}
@ -635,11 +635,25 @@ fn generateBuiltinTypes(comp: *Compilation) !void {
.intptr = intptr,
.int16 = int16,
.int64 = int64,
.uint_least16_t = comp.intLeastN(16, .unsigned),
.uint_least32_t = comp.intLeastN(32, .unsigned),
};
try comp.generateNsConstantStringType();
}
/// Smallest integer type with at least N bits
fn intLeastN(comp: *const Compilation, bits: usize, signedness: std.builtin.Signedness) Type {
const candidates = switch (signedness) {
.signed => &[_]Type.Specifier{ .schar, .short, .int, .long, .long_long },
.unsigned => &[_]Type.Specifier{ .uchar, .ushort, .uint, .ulong, .ulong_long },
};
for (candidates) |specifier| {
const ty: Type = .{ .specifier = specifier };
if (ty.sizeof(comp).? * 8 >= bits) return ty;
} else unreachable;
}
fn intSize(comp: *const Compilation, specifier: Type.Specifier) u64 {
const ty = Type{ .specifier = specifier };
return ty.sizeof(comp).?;
@ -944,21 +958,29 @@ pub fn getSource(comp: *const Compilation, id: Source.Id) Source {
.buf = comp.generated_buf.items,
.id = .generated,
.splice_locs = &.{},
.kind = .user,
};
return comp.sources.values()[@intFromEnum(id) - 2];
}
/// Creates a Source from the contents of `reader` and adds it to the Compilation
/// Performs newline splicing, line-ending normalization to '\n', and UTF-8 validation.
/// caller retains ownership of `path`
/// `expected_size` will be allocated to hold the contents of `reader` and *must* be at least
/// as large as the entire contents of `reader`.
/// To add a pre-existing buffer as a Source, see addSourceFromBuffer
/// To add a file's contents given its path, see addSourceFromPath
pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8, expected_size: u32) !Source {
var contents = try comp.gpa.alloc(u8, expected_size);
pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8, kind: Source.Kind) !Source {
const contents = try reader.readAllAlloc(comp.gpa, std.math.maxInt(u32));
errdefer comp.gpa.free(contents);
return comp.addSourceFromOwnedBuffer(contents, path, kind);
}
/// Creates a Source from `buf` and adds it to the Compilation
/// Performs newline splicing and line-ending normalization to '\n'
/// `buf` will be modified and the allocation will be resized if newline splicing
/// or line-ending changes happen.
/// caller retains ownership of `path`
/// To add the contents of an arbitrary reader as a Source, see addSourceFromReader
/// To add a file's contents given its path, see addSourceFromPath
pub fn addSourceFromOwnedBuffer(comp: *Compilation, buf: []u8, path: []const u8, kind: Source.Kind) !Source {
try comp.sources.ensureUnusedCapacity(1);
var contents = buf;
const duped_path = try comp.gpa.dupe(u8, path);
errdefer comp.gpa.free(duped_path);
@ -981,11 +1003,7 @@ pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8
} = .beginning_of_file;
var line: u32 = 1;
while (true) {
const byte = reader.readByte() catch |err| switch (err) {
error.EndOfStream => break,
else => |e| return e,
};
for (contents) |byte| {
contents[i] = byte;
switch (byte) {
@ -1083,33 +1101,40 @@ pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8
errdefer comp.gpa.free(splice_locs);
if (i != contents.len) contents = try comp.gpa.realloc(contents, i);
errdefer @compileError("errdefers in callers would possibly free the realloced slice using the original len");
var source = Source{
.id = source_id,
.path = duped_path,
.buf = contents,
.splice_locs = splice_locs,
.kind = kind,
};
try comp.sources.put(duped_path, source);
if (source.offsetOfInvalidUtf8()) |offset| {
try comp.invalid_utf8_locs.putNoClobber(comp.gpa, source_id, offset);
}
comp.sources.putAssumeCapacityNoClobber(duped_path, source);
return source;
}
/// Caller retains ownership of `path` and `buf`.
/// Dupes the source buffer; if it is acceptable to modify the source buffer and possibly resize
/// the allocation, please use `addSourceFromOwnedBuffer`
pub fn addSourceFromBuffer(comp: *Compilation, path: []const u8, buf: []const u8) !Source {
if (comp.sources.get(path)) |some| return some;
if (@as(u64, buf.len) > std.math.maxInt(u32)) return error.StreamTooLong;
const size = std.math.cast(u32, buf.len) orelse return error.StreamTooLong;
var buf_reader = std.io.fixedBufferStream(buf);
const contents = try comp.gpa.dupe(u8, buf);
errdefer comp.gpa.free(contents);
return comp.addSourceFromReader(buf_reader.reader(), path, size);
return comp.addSourceFromOwnedBuffer(contents, path, .user);
}
/// Caller retains ownership of `path`
/// Caller retains ownership of `path`.
pub fn addSourceFromPath(comp: *Compilation, path: []const u8) !Source {
return comp.addSourceFromPathExtra(path, .user);
}
/// Caller retains ownership of `path`.
fn addSourceFromPathExtra(comp: *Compilation, path: []const u8, kind: Source.Kind) !Source {
if (comp.sources.get(path)) |some| return some;
if (mem.indexOfScalar(u8, path, 0) != null) {
@ -1119,10 +1144,13 @@ pub fn addSourceFromPath(comp: *Compilation, path: []const u8) !Source {
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
const size = std.math.cast(u32, try file.getEndPos()) orelse return error.StreamTooLong;
var buf_reader = std.io.bufferedReader(file.reader());
const contents = file.readToEndAlloc(comp.gpa, std.math.maxInt(u32)) catch |err| switch (err) {
error.FileTooBig => return error.StreamTooLong,
else => |e| return e,
};
errdefer comp.gpa.free(contents);
return comp.addSourceFromReader(buf_reader.reader(), path, size);
return comp.addSourceFromOwnedBuffer(contents, path, kind);
}
pub const IncludeDirIterator = struct {
@ -1130,28 +1158,46 @@ pub const IncludeDirIterator = struct {
cwd_source_id: ?Source.Id,
include_dirs_idx: usize = 0,
sys_include_dirs_idx: usize = 0,
tried_ms_cwd: bool = false,
fn next(self: *IncludeDirIterator) ?[]const u8 {
const FoundSource = struct {
path: []const u8,
kind: Source.Kind,
};
fn next(self: *IncludeDirIterator) ?FoundSource {
if (self.cwd_source_id) |source_id| {
self.cwd_source_id = null;
const path = self.comp.getSource(source_id).path;
return std.fs.path.dirname(path) orelse ".";
return .{ .path = std.fs.path.dirname(path) orelse ".", .kind = .user };
}
if (self.include_dirs_idx < self.comp.include_dirs.items.len) {
defer self.include_dirs_idx += 1;
return self.comp.include_dirs.items[self.include_dirs_idx];
return .{ .path = self.comp.include_dirs.items[self.include_dirs_idx], .kind = .user };
}
if (self.sys_include_dirs_idx < self.comp.system_include_dirs.items.len) {
defer self.sys_include_dirs_idx += 1;
return self.comp.system_include_dirs.items[self.sys_include_dirs_idx];
return .{ .path = self.comp.system_include_dirs.items[self.sys_include_dirs_idx], .kind = .system };
}
if (self.comp.ms_cwd_source_id) |source_id| {
if (self.tried_ms_cwd) return null;
self.tried_ms_cwd = true;
const path = self.comp.getSource(source_id).path;
return .{ .path = std.fs.path.dirname(path) orelse ".", .kind = .user };
}
return null;
}
/// Returned value must be freed by allocator
fn nextWithFile(self: *IncludeDirIterator, filename: []const u8, allocator: Allocator) !?[]const u8 {
while (self.next()) |dir| {
return try std.fs.path.join(allocator, &.{ dir, filename });
/// Returned value's path field must be freed by allocator
fn nextWithFile(self: *IncludeDirIterator, filename: []const u8, allocator: Allocator) !?FoundSource {
while (self.next()) |found| {
const path = try std.fs.path.join(allocator, &.{ found.path, filename });
if (self.comp.langopts.ms_extensions) {
for (path) |*c| {
if (c.* == '\\') c.* = '/';
}
}
return .{ .path = path, .kind = found.kind };
}
return null;
}
@ -1161,8 +1207,8 @@ pub const IncludeDirIterator = struct {
fn skipUntilDirMatch(self: *IncludeDirIterator, source: Source.Id) void {
const path = self.comp.getSource(source).path;
const includer_path = std.fs.path.dirname(path) orelse ".";
while (self.next()) |dir| {
if (mem.eql(u8, includer_path, dir)) break;
while (self.next()) |found| {
if (mem.eql(u8, includer_path, found.path)) break;
}
}
};
@ -1196,9 +1242,9 @@ pub fn hasInclude(
var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);
while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
defer stack_fallback.get().free(path);
if (!std.meta.isError(cwd.access(path, .{}))) return true;
while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
defer stack_fallback.get().free(found.path);
if (!std.meta.isError(cwd.access(found.path, .{}))) return true;
}
return false;
}
@ -1247,9 +1293,9 @@ pub fn findEmbed(
var it = IncludeDirIterator{ .comp = comp, .cwd_source_id = cwd_source_id };
var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);
while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
defer stack_fallback.get().free(path);
if (comp.getFileContents(path)) |some|
while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
defer stack_fallback.get().free(found.path);
if (comp.getFileContents(found.path)) |some|
return some
else |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
@ -1262,7 +1308,7 @@ pub fn findEmbed(
pub fn findInclude(
comp: *Compilation,
filename: []const u8,
includer_token_source: Source.Id,
includer_token: Token,
/// angle bracket vs quotes
include_type: IncludeType,
/// include vs include_next
@ -1270,6 +1316,7 @@ pub fn findInclude(
) !?Source {
if (std.fs.path.isAbsolute(filename)) {
if (which == .next) return null;
// TODO: classify absolute file as belonging to system includes or not?
return if (comp.addSourceFromPath(filename)) |some|
some
else |err| switch (err) {
@ -1279,7 +1326,7 @@ pub fn findInclude(
}
const cwd_source_id = switch (include_type) {
.quotes => switch (which) {
.first => includer_token_source,
.first => includer_token.source,
.next => null,
},
.angle_brackets => null,
@ -1287,15 +1334,26 @@ pub fn findInclude(
var it = IncludeDirIterator{ .comp = comp, .cwd_source_id = cwd_source_id };
if (which == .next) {
it.skipUntilDirMatch(includer_token_source);
it.skipUntilDirMatch(includer_token.source);
}
var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);
while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
defer stack_fallback.get().free(path);
if (comp.addSourceFromPath(path)) |some|
return some
else |err| switch (err) {
while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
defer stack_fallback.get().free(found.path);
if (comp.addSourceFromPathExtra(found.path, found.kind)) |some| {
if (it.tried_ms_cwd) {
try comp.diag.add(.{
.tag = .ms_search_rule,
.extra = .{ .str = some.path },
.loc = .{
.id = includer_token.source,
.byte_offset = includer_token.start,
.line = includer_token.line,
},
}, &.{});
}
return some;
} else |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
else => {},
}
@ -1358,9 +1416,7 @@ pub fn hasBuiltin(comp: *const Compilation, name: []const u8) bool {
std.mem.eql(u8, name, "__builtin_offsetof") or
std.mem.eql(u8, name, "__builtin_types_compatible_p")) return true;
@setEvalBranchQuota(10_000);
const tag = std.meta.stringToEnum(BuiltinFunction.Tag, name) orelse return false;
const builtin = BuiltinFunction.fromTag(tag);
const builtin = BuiltinFunction.fromName(name) orelse return false;
return comp.hasBuiltinFunction(builtin);
}
@ -1383,7 +1439,7 @@ test "addSourceFromReader" {
defer comp.deinit();
var buf_reader = std.io.fixedBufferStream(str);
const source = try comp.addSourceFromReader(buf_reader.reader(), "path", @intCast(str.len));
const source = try comp.addSourceFromReader(buf_reader.reader(), "path", .user);
try std.testing.expectEqualStrings(expected, source.buf);
try std.testing.expectEqual(warning_count, @as(u32, @intCast(comp.diag.list.items.len)));
@ -1460,32 +1516,26 @@ test "ignore BOM at beginning of file" {
const BOM = "\xEF\xBB\xBF";
const Test = struct {
fn run(buf: []const u8, input_type: enum { valid_utf8, invalid_utf8 }) !void {
fn run(buf: []const u8) !void {
var comp = Compilation.init(std.testing.allocator);
defer comp.deinit();
var buf_reader = std.io.fixedBufferStream(buf);
const source = try comp.addSourceFromReader(buf_reader.reader(), "file.c", @intCast(buf.len));
switch (input_type) {
.valid_utf8 => {
const expected_output = if (mem.startsWith(u8, buf, BOM)) buf[BOM.len..] else buf;
try std.testing.expectEqualStrings(expected_output, source.buf);
try std.testing.expect(!comp.invalid_utf8_locs.contains(source.id));
},
.invalid_utf8 => try std.testing.expect(comp.invalid_utf8_locs.contains(source.id)),
}
const source = try comp.addSourceFromReader(buf_reader.reader(), "file.c", .user);
const expected_output = if (mem.startsWith(u8, buf, BOM)) buf[BOM.len..] else buf;
try std.testing.expectEqualStrings(expected_output, source.buf);
}
};
try Test.run(BOM, .valid_utf8);
try Test.run(BOM ++ "x", .valid_utf8);
try Test.run("x" ++ BOM, .valid_utf8);
try Test.run(BOM ++ " ", .valid_utf8);
try Test.run(BOM ++ "\n", .valid_utf8);
try Test.run(BOM ++ "\\", .valid_utf8);
try Test.run(BOM);
try Test.run(BOM ++ "x");
try Test.run("x" ++ BOM);
try Test.run(BOM ++ " ");
try Test.run(BOM ++ "\n");
try Test.run(BOM ++ "\\");
try Test.run(BOM[0..1] ++ "x", .invalid_utf8);
try Test.run(BOM[0..2] ++ "x", .invalid_utf8);
try Test.run(BOM[1..] ++ "x", .invalid_utf8);
try Test.run(BOM[2..] ++ "x", .invalid_utf8);
try Test.run(BOM[0..1] ++ "x");
try Test.run(BOM[0..2] ++ "x");
try Test.run(BOM[1..] ++ "x");
try Test.run(BOM[2..] ++ "x");
}

View File

@ -54,6 +54,10 @@ pub const Message = struct {
builtin: BuiltinFunction.Tag,
header: Header,
},
invalid_escape: struct {
offset: u32,
char: u8,
},
actual_codepoint: u21,
ascii: u7,
unsigned: u64,
@ -114,6 +118,7 @@ pub const Options = packed struct {
@"c99-compat": Kind = .default,
@"unicode-zero-width": Kind = .default,
@"unicode-homoglyph": Kind = .default,
unicode: Kind = .default,
@"return-type": Kind = .default,
@"dollar-in-identifier-extension": Kind = .default,
@"unknown-pragmas": Kind = .default,
@ -168,6 +173,11 @@ pub const Options = packed struct {
@"fuse-ld-path": Kind = .default,
@"language-extension-token": Kind = .default,
@"complex-component-init": Kind = .default,
@"microsoft-include": Kind = .default,
@"microsoft-end-of-file": Kind = .default,
@"invalid-source-encoding": Kind = .default,
@"four-char-constants": Kind = .default,
@"unknown-escape-sequence": Kind = .default,
};
const messages = struct {
@ -372,6 +382,10 @@ const messages = struct {
const kind = .warning;
const all = true;
};
pub const missing_type_specifier_c2x = struct {
const msg = "a type specifier is required for all declarations";
const kind = .@"error";
};
pub const multiple_storage_class = struct {
const msg = "cannot combine with previous '{s}' declaration specifier";
const extra = .str;
@ -831,15 +845,20 @@ const messages = struct {
const msg = "invalid universal character";
const kind = .@"error";
};
pub const multichar_literal = struct {
pub const incomplete_universal_character = struct {
const msg = "incomplete universal character name";
const kind = .@"error";
};
pub const multichar_literal_warning = struct {
const msg = "multi-character character constant";
const opt = "multichar";
const kind = .warning;
const all = true;
};
pub const unicode_multichar_literal = struct {
const msg = "Unicode character literals may not contain multiple characters";
pub const invalid_multichar_literal = struct {
const msg = "{s} character literals may not contain multiple characters";
const kind = .@"error";
const extra = .str;
};
pub const wide_multichar_literal = struct {
const msg = "extraneous characters in character constant ignored";
@ -1474,6 +1493,16 @@ const messages = struct {
const opt = "c99-compat";
const kind = .off;
};
pub const unexpected_character = struct {
const msg = "unexpected character <U+{X:0>4}>";
const extra = .actual_codepoint;
const kind = .@"error";
};
pub const invalid_identifier_start_char = struct {
const msg = "character <U+{X:0>4}> not allowed at the start of an identifier";
const extra = .actual_codepoint;
const kind = .@"error";
};
pub const unicode_zero_width = struct {
const msg = "identifier contains Unicode character <U+{X:0>4}> that is invisible in some environments";
const opt = "unicode-homoglyph";
@ -1797,9 +1826,10 @@ const messages = struct {
const kind = .warning;
};
pub const non_standard_escape_char = struct {
const msg = "use of non-standard escape character '\\e'";
const msg = "use of non-standard escape character '\\{s}'";
const kind = .off;
const opt = "pedantic";
const extra = .invalid_escape;
};
pub const invalid_pp_stringify_escape = struct {
const msg = "invalid string literal, ignoring final '\\'";
@ -2399,7 +2429,6 @@ const messages = struct {
const opt = "pedantic";
const extra = .str;
const kind = .off;
const pedantic = true;
};
pub const not_floating_type = struct {
const msg = "argument type '{s}' is not a real floating point type";
@ -2411,6 +2440,75 @@ const messages = struct {
const extra = .str;
const kind = .@"error";
};
pub const ms_search_rule = struct {
const msg = "#include resolved using non-portable Microsoft search rules as: {s}";
const extra = .str;
const opt = "microsoft-include";
const kind = .warning;
};
pub const ctrl_z_eof = struct {
const msg = "treating Ctrl-Z as end-of-file is a Microsoft extension";
const opt = "microsoft-end-of-file";
const kind = .off;
const pedantic = true;
};
pub const illegal_char_encoding_warning = struct {
const msg = "illegal character encoding in character literal";
const opt = "invalid-source-encoding";
const kind = .warning;
};
pub const illegal_char_encoding_error = struct {
const msg = "illegal character encoding in character literal";
const kind = .@"error";
};
pub const ucn_basic_char_error = struct {
const msg = "character '{c}' cannot be specified by a universal character name";
const kind = .@"error";
const extra = .ascii;
};
pub const ucn_basic_char_warning = struct {
const msg = "specifying character '{c}' with a universal character name is incompatible with C standards before C2x";
const kind = .off;
const extra = .ascii;
const suppress_unless_version = .c2x;
const opt = "pre-c2x-compat";
};
pub const ucn_control_char_error = struct {
const msg = "universal character name refers to a control character";
const kind = .@"error";
};
pub const ucn_control_char_warning = struct {
const msg = "universal character name referring to a control character is incompatible with C standards before C2x";
const kind = .off;
const suppress_unless_version = .c2x;
const opt = "pre-c2x-compat";
};
pub const c89_ucn_in_literal = struct {
const msg = "universal character names are only valid in C99 or later";
const suppress_version = .c99;
const kind = .warning;
const opt = "unicode";
};
pub const four_char_char_literal = struct {
const msg = "multi-character character constant";
const opt = "four-char-constants";
const kind = .off;
};
pub const multi_char_char_literal = struct {
const msg = "multi-character character constant";
const kind = .off;
};
pub const missing_hex_escape = struct {
const msg = "\\{c} used with no following hex digits";
const kind = .@"error";
const extra = .ascii;
};
pub const unknown_escape_sequence = struct {
const msg = "unknown escape sequence '\\{s}'";
const kind = .warning;
const opt = "unknown-escape-sequence";
const extra = .invalid_escape;
};
};
list: std.ArrayListUnmanaged(Message) = .{},
@ -2585,9 +2683,11 @@ pub fn renderMessage(comp: *Compilation, m: anytype, msg: Message) void {
switch (msg.tag) {
.escape_sequence_overflow,
.invalid_universal_character,
.non_standard_escape_char,
// use msg.extra.unsigned for index into string literal
=> loc.byte_offset += @truncate(msg.extra.unsigned),
.non_standard_escape_char,
.unknown_escape_sequence,
=> loc.byte_offset += msg.extra.invalid_escape.offset,
else => {},
}
const source = comp.getSource(loc.id);
@ -2650,8 +2750,18 @@ pub fn renderMessage(comp: *Compilation, m: anytype, msg: Message) void {
}),
.builtin_with_header => m.print(info.msg, .{
@tagName(msg.extra.builtin_with_header.header),
@tagName(msg.extra.builtin_with_header.builtin),
BuiltinFunction.nameFromTag(msg.extra.builtin_with_header.builtin).span(),
}),
.invalid_escape => {
if (std.ascii.isPrint(msg.extra.invalid_escape.char)) {
const str: [1]u8 = .{msg.extra.invalid_escape.char};
m.print(info.msg, .{&str});
} else {
var buf: [3]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "x{x}", .{std.fmt.fmtSliceHexLower(&.{msg.extra.invalid_escape.char})}) catch unreachable;
m.print(info.msg, .{&buf});
}
},
else => @compileError("invalid extra kind " ++ @tagName(info.extra)),
}
} else {
@ -2770,6 +2880,7 @@ const MsgWriter = struct {
fn end(m: *MsgWriter, maybe_line: ?[]const u8, col: u32, end_with_splice: bool) void {
const line = maybe_line orelse {
m.write("\n");
m.setColor(.reset);
return;
};
const trailer = if (end_with_splice) "\\ " else "";

40
deps/aro/Driver.zig vendored
View File

@ -28,6 +28,10 @@ link_objects: std.ArrayListUnmanaged([]const u8) = .{},
output_name: ?[]const u8 = null,
sysroot: ?[]const u8 = null,
temp_file_count: u32 = 0,
/// If false, do not emit line directives in -E mode
line_commands: bool = true,
/// If true, use `#line <num>` instead of `# <num>` for line directives
use_line_directives: bool = false,
only_preprocess: bool = false,
only_syntax: bool = false,
only_compile: bool = false,
@ -111,11 +115,15 @@ pub const usage =
\\ -fsyntax-only Only run the preprocessor, parser, and semantic analysis stages
\\ -funsigned-char "char" is unsigned
\\ -fno-unsigned-char "char" is signed
\\ -fuse-line-directives Use `#line <num>` linemarkers in preprocessed output
\\ -fno-use-line-directives
\\ Use `# <num>` linemarkers in preprocessed output
\\ -I <dir> Add directory to include search path
\\ -isystem Add directory to SYSTEM include search path
\\ --emulate=[clang|gcc|msvc]
\\ Select which C compiler to emulate (default clang)
\\ -o <file> Write output to <file>
\\ -P, --no-line-commands Disable linemarker output in -E mode
\\ -pedantic Warn on language extensions
\\ --rtlib=<arg> Compiler runtime library to use (libgcc or compiler-rt)
\\ -std=<standard> Specify language standard
@ -169,6 +177,7 @@ pub fn parseArgs(
off,
unset,
} = .unset;
var comment_arg: []const u8 = "";
while (i < args.len) : (i += 1) {
const arg = args[i];
if (mem.startsWith(u8, arg, "-") and arg.len > 1) {
@ -213,6 +222,12 @@ pub fn parseArgs(
d.only_compile = true;
} else if (mem.eql(u8, arg, "-E")) {
d.only_preprocess = true;
} else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
d.line_commands = false;
} else if (mem.eql(u8, arg, "-fuse-line-directives")) {
d.use_line_directives = true;
} else if (mem.eql(u8, arg, "-fno-use-line-directives")) {
d.use_line_directives = false;
} else if (mem.eql(u8, arg, "-fchar8_t")) {
d.comp.langopts.has_char8_t_override = true;
} else if (mem.eql(u8, arg, "-fno-char8_t")) {
@ -358,6 +373,13 @@ pub fn parseArgs(
d.verbose_ir = true;
} else if (mem.eql(u8, arg, "--verbose-linker-args")) {
d.verbose_linker_args = true;
} else if (mem.eql(u8, arg, "-C") or mem.eql(u8, arg, "--comments")) {
d.comp.langopts.preserve_comments = true;
comment_arg = arg;
} else if (mem.eql(u8, arg, "-CC") or mem.eql(u8, arg, "--comments-in-macros")) {
d.comp.langopts.preserve_comments = true;
d.comp.langopts.preserve_comments_in_macros = true;
comment_arg = arg;
} else if (option(arg, "-fuse-ld=")) |linker_name| {
d.use_linker = linker_name;
} else if (mem.eql(u8, arg, "-fuse-ld=")) {
@ -419,6 +441,9 @@ pub fn parseArgs(
.off => false,
.unset => util.fileSupportsColor(std.io.getStdErr()) and !std.process.hasEnvVarConstant("NO_COLOR"),
};
if (d.comp.langopts.preserve_comments and !d.only_preprocess) {
return d.fatal("invalid argument '{s}' only allowed with '-E'", .{comment_arg});
}
return false;
}
@ -518,12 +543,25 @@ fn processSource(
var pp = Preprocessor.init(d.comp);
defer pp.deinit();
if (d.comp.langopts.ms_extensions) {
d.comp.ms_cwd_source_id = source.id;
}
if (d.verbose_pp) pp.verbose = true;
if (d.only_preprocess) pp.preserve_whitespace = true;
if (d.only_preprocess) {
pp.preserve_whitespace = true;
if (d.line_commands) {
pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
}
}
try pp.addBuiltinMacros();
try pp.addIncludeStart(source);
try pp.addIncludeStart(builtin);
_ = try pp.preprocess(builtin);
try pp.addIncludeStart(user_macros);
_ = try pp.preprocess(user_macros);
try pp.addIncludeResume(source.id, 0, 0);
const eof = try pp.preprocess(source);
try pp.tokens.append(pp.comp.gpa, eof);

20
deps/aro/LangOpts.zig vendored
View File

@ -1,5 +1,6 @@
const std = @import("std");
const DiagnosticTag = @import("Diagnostics.zig").Tag;
const CharInfo = @import("CharInfo.zig");
const LangOpts = @This();
@ -85,6 +86,20 @@ pub const Standard = enum {
.c2x, .gnu2x => "202311L",
};
}
pub fn codepointAllowedInIdentifier(standard: Standard, codepoint: u21, is_start: bool) bool {
if (is_start) {
return if (standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
else
CharInfo.isC99IdChar(codepoint) and !CharInfo.isC99DisallowedInitialIDChar(codepoint);
} else {
return if (standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint)
else
CharInfo.isC99IdChar(codepoint);
}
}
};
emulate: Compiler = .clang,
@ -110,6 +125,11 @@ has_char8_t_override: ?bool = null,
/// Whether to allow GNU-style inline assembly
gnu_asm: bool = true,
/// Preserve comments when preprocessing
preserve_comments: bool = false,
/// Preserve comments in macros when preprocessing
preserve_comments_in_macros: bool = false,
pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void {
self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard;
}

321
deps/aro/Parser.zig vendored
View File

@ -17,6 +17,7 @@ const NodeList = std.ArrayList(NodeIndex);
const InitList = @import("InitList.zig");
const Attribute = @import("Attribute.zig");
const CharInfo = @import("CharInfo.zig");
const CharLiteral = @import("CharLiteral.zig");
const Value = @import("Value.zig");
const SymbolStack = @import("SymbolStack.zig");
const Symbol = SymbolStack.Symbol;
@ -186,15 +187,18 @@ string_ids: struct {
ucontext_t: StringId,
},
fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool {
if (codepoint <= 0x7F) return false;
var diagnosed = false;
/// Checks codepoint for various pedantic warnings
/// Returns true if diagnostic issued
fn checkIdentifierCodepointWarnings(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool {
assert(codepoint >= 0x80);
const err_start = comp.diag.list.items.len;
if (!CharInfo.isC99IdChar(codepoint)) {
try comp.diag.add(.{
.tag = .c99_compat,
.loc = loc,
}, &.{});
diagnosed = true;
}
if (CharInfo.isInvisible(codepoint)) {
try comp.diag.add(.{
@ -202,7 +206,6 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
.loc = loc,
.extra = .{ .actual_codepoint = codepoint },
}, &.{});
diagnosed = true;
}
if (CharInfo.homoglyph(codepoint)) |resembles| {
try comp.diag.add(.{
@ -210,31 +213,78 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
.loc = loc,
.extra = .{ .codepoints = .{ .actual = codepoint, .resembles = resembles } },
}, &.{});
diagnosed = true;
}
return diagnosed;
return comp.diag.list.items.len != err_start;
}
/// Issues diagnostics for the current extended identifier token
/// Return value indicates whether the token should be considered an identifier
/// true means consider the token to actually be an identifier
/// false means it is not
fn validateExtendedIdentifier(p: *Parser) !bool {
assert(p.tok_ids[p.tok_i] == .extended_identifier);
const slice = p.tokSlice(p.tok_i);
const view = std.unicode.Utf8View.init(slice) catch {
try p.errTok(.invalid_utf8, p.tok_i);
return error.FatalError;
};
var it = view.iterator();
var valid_identifier = true;
var warned = false;
var len: usize = 0;
var invalid_char: u21 = undefined;
var loc = p.pp.tokens.items(.loc)[p.tok_i];
const standard = p.comp.langopts.standard;
while (it.nextCodepoint()) |codepoint| {
defer {
len += 1;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
}
if (codepoint == '$') {
warned = true;
try p.comp.diag.add(.{
.tag = .dollar_in_identifier_extension,
.loc = loc,
}, &.{});
}
if (codepoint <= 0x7F) continue;
if (!valid_identifier) continue;
const allowed = standard.codepointAllowedInIdentifier(codepoint, len == 0);
if (!allowed) {
invalid_char = codepoint;
valid_identifier = false;
continue;
}
if (!warned) {
warned = try checkIdentifierCodepointWarnings(p.comp, codepoint, loc);
}
}
if (!valid_identifier) {
if (len == 1) {
try p.errExtra(.unexpected_character, p.tok_i, .{ .actual_codepoint = invalid_char });
return false;
} else {
try p.errExtra(.invalid_identifier_start_char, p.tok_i, .{ .actual_codepoint = invalid_char });
}
}
return true;
}
fn eatIdentifier(p: *Parser) !?TokenIndex {
switch (p.tok_ids[p.tok_i]) {
.identifier => {},
.extended_identifier => {
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];
if (mem.indexOfScalar(u8, slice, '$')) |i| {
loc.byte_offset += @intCast(i);
try p.comp.diag.add(.{
.tag = .dollar_in_identifier_extension,
.loc = loc,
}, &.{});
loc = p.pp.tokens.items(.loc)[p.tok_i];
}
while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
if (!try p.validateExtendedIdentifier()) {
p.tok_i += 1;
return null;
}
},
else => return null,
@ -566,6 +616,7 @@ fn diagnoseIncompleteDefinitions(p: *Parser) !void {
/// root : (decl | assembly ';' | staticAssert)*
pub fn parse(pp: *Preprocessor) Compilation.Error!Tree {
assert(pp.linemarkers == .none);
pp.comp.pragmaEvent(.before_parse);
var arena = std.heap.ArenaAllocator.init(pp.comp.gpa);
@ -1692,10 +1743,10 @@ fn initDeclarator(p: *Parser, decl_spec: *DeclSpec, attr_buf_top: usize) Error!?
try p.errStr(.tentative_array, name, try p.typeStr(init_d.d.ty));
break :incomplete;
} else if (init_d.d.ty.getRecord()) |record| {
_ = try p.tentative_defs.getOrPutValue(p.comp.gpa, record.name, init_d.d.name);
_ = try p.tentative_defs.getOrPutValue(p.gpa, record.name, init_d.d.name);
break :incomplete;
} else if (init_d.d.ty.get(.@"enum")) |en| {
_ = try p.tentative_defs.getOrPutValue(p.comp.gpa, en.data.@"enum".name, init_d.d.name);
_ = try p.tentative_defs.getOrPutValue(p.gpa, en.data.@"enum".name, init_d.d.name);
break :incomplete;
}
}
@ -2078,7 +2129,7 @@ fn recordSpec(p: *Parser) Error!Type {
// TODO: msvc considers `#pragma pack` on a per-field basis
.msvc => p.pragma_pack,
};
record_layout.compute(record_ty, ty, p.pp.comp, pragma_pack_value);
record_layout.compute(record_ty, ty, p.comp, pragma_pack_value);
}
// finish by creating a node
@ -2651,6 +2702,7 @@ fn enumerator(p: *Parser, e: *Enumerator) Error!?EnumFieldAndNode {
.node = res.node,
} },
});
try p.value_map.put(node, e.res.val);
return EnumFieldAndNode{ .field = .{
.name = interned_name,
.ty = res.ty,
@ -3355,7 +3407,7 @@ fn findScalarInitializer(p: *Parser, il: **InitList, ty: *Type, actual_ty: Type,
return false;
} else if (ty.get(.@"struct")) |struct_ty| {
if (il.*.node != .none) return false;
if (actual_ty.eql(ty.*, p.pp.comp, false)) return true;
if (actual_ty.eql(ty.*, p.comp, false)) return true;
const start_index = il.*.list.items.len;
var index = if (start_index != 0) il.*.list.items[start_index - 1].index + 1 else start_index;
@ -3375,14 +3427,14 @@ fn findScalarInitializer(p: *Parser, il: **InitList, ty: *Type, actual_ty: Type,
return false;
} else if (ty.get(.@"union")) |union_ty| {
if (il.*.node != .none) return false;
if (actual_ty.eql(ty.*, p.pp.comp, false)) return true;
if (actual_ty.eql(ty.*, p.comp, false)) return true;
if (union_ty.data.record.fields.len == 0) {
try p.errTok(.empty_aggregate_init_braces, first_tok);
return error.ParsingFailed;
}
ty.* = union_ty.data.record.fields[0].ty;
il.* = try il.*.find(p.gpa, 0);
// if (il.*.node == .none and actual_ty.eql(ty, p.pp.comp, false)) return true;
// if (il.*.node == .none and actual_ty.eql(ty, p.comp, false)) return true;
if (try p.findScalarInitializer(il, ty, actual_ty, first_tok)) return true;
return false;
}
@ -3708,7 +3760,7 @@ fn gnuAsmStmt(p: *Parser, quals: Tree.GNUAssemblyQualifiers, l_paren: TokenIndex
const expected_items = 8; // arbitrarily chosen, most assembly will have fewer than 8 inputs/outputs/constraints/names
const bytes_needed = expected_items * @sizeOf(?TokenIndex) + expected_items * 3 * @sizeOf(NodeIndex);
var stack_fallback = std.heap.stackFallback(bytes_needed, p.comp.gpa);
var stack_fallback = std.heap.stackFallback(bytes_needed, p.gpa);
const allocator = stack_fallback.get();
// TODO: Consider using a TokenIndex of 0 instead of null if we need to store the names in the tree
@ -4572,7 +4624,10 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => true,
.builtin => |builtin| switch (builtin.tag) {
.__builtin_va_start, .__va_start, .va_start => arg_idx != 1,
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
=> arg_idx != 1,
else => true,
},
};
@ -4582,8 +4637,11 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => true,
.builtin => |builtin| switch (builtin.tag) {
.__builtin_va_start, .__va_start, .va_start => arg_idx != 1,
.__builtin_complex => false,
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
=> arg_idx != 1,
BuiltinFunction.tagFromName("__builtin_complex").? => false,
else => true,
},
};
@ -4600,8 +4658,11 @@ const CallExpr = union(enum) {
const builtin_tok = p.nodes.items(.data)[@intFromEnum(self.builtin.node)].decl.name;
switch (self.builtin.tag) {
.__builtin_va_start, .__va_start, .va_start => return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
.__builtin_complex => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
BuiltinFunction.tagFromName("__builtin_va_start").?,
BuiltinFunction.tagFromName("__va_start").?,
BuiltinFunction.tagFromName("va_start").?,
=> return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
BuiltinFunction.tagFromName("__builtin_complex").? => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
else => {},
}
}
@ -4615,7 +4676,7 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => null,
.builtin => |builtin| switch (builtin.tag) {
.__builtin_complex => 2,
BuiltinFunction.tagFromName("__builtin_complex").? => 2,
else => null,
},
};
@ -4625,7 +4686,7 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => callable_ty.returnType(),
.builtin => |builtin| switch (builtin.tag) {
.__builtin_complex => {
BuiltinFunction.tagFromName("__builtin_complex").? => {
const last_param = p.list_buf.items[p.list_buf.items.len - 1];
return p.nodes.items(.ty)[@intFromEnum(last_param)].makeComplex();
},
@ -7518,7 +7579,7 @@ fn stringLiteral(p: *Parser) Error!Result {
'a' => p.retained_strings.appendAssumeCapacity(0x07),
'b' => p.retained_strings.appendAssumeCapacity(0x08),
'e' => {
try p.errExtra(.non_standard_escape_char, start, .{ .unsigned = i - 1 });
try p.errExtra(.non_standard_escape_char, start, .{ .invalid_escape = .{ .char = 'e', .offset = @intCast(i) } });
p.retained_strings.appendAssumeCapacity(0x1B);
},
'f' => p.retained_strings.appendAssumeCapacity(0x0C),
@ -7584,130 +7645,82 @@ fn parseUnicodeEscape(p: *Parser, tok: TokenIndex, count: u8, slice: []const u8,
fn charLiteral(p: *Parser) Error!Result {
defer p.tok_i += 1;
const allow_multibyte = switch (p.tok_ids[p.tok_i]) {
.char_literal => false,
.char_literal_utf_8 => false,
.char_literal_wide => true,
.char_literal_utf_16 => true,
.char_literal_utf_32 => true,
else => unreachable,
};
const ty: Type = switch (p.tok_ids[p.tok_i]) {
.char_literal => .{ .specifier = .int },
.char_literal_utf_8 => .{ .specifier = .uchar },
.char_literal_wide => p.comp.types.wchar,
.char_literal_utf_16 => .{ .specifier = .ushort },
.char_literal_utf_32 => .{ .specifier = .ulong },
else => unreachable,
};
const max: u32 = switch (p.tok_ids[p.tok_i]) {
.char_literal => std.math.maxInt(u8),
.char_literal_wide => @intCast(p.comp.types.wchar.maxInt(p.comp)),
.char_literal_utf_8 => std.math.maxInt(u8),
.char_literal_utf_16 => std.math.maxInt(u16),
.char_literal_utf_32 => std.math.maxInt(u32),
else => unreachable,
};
var multichar: u8 = switch (p.tok_ids[p.tok_i]) {
.char_literal => 0,
.char_literal_wide => 4,
.char_literal_utf_8 => 2,
.char_literal_utf_16 => 2,
.char_literal_utf_32 => 2,
else => unreachable,
};
const tok_id = p.tok_ids[p.tok_i];
const char_kind = CharLiteral.Kind.classify(tok_id);
var val: u32 = 0;
var overflow_reported = false;
var slice = p.tokSlice(p.tok_i);
slice = slice[0 .. slice.len - 1];
var i = mem.indexOf(u8, slice, "\'").? + 1;
while (i < slice.len) : (i += 1) {
var c: u32 = slice[i];
var multibyte = false;
switch (c) {
'\\' => {
i += 1;
switch (slice[i]) {
'\n' => i += 1,
'\r' => i += 2,
'\'', '\"', '\\', '?' => c = slice[i],
'n' => c = '\n',
'r' => c = '\r',
't' => c = '\t',
'a' => c = 0x07,
'b' => c = 0x08,
'e' => {
try p.errExtra(.non_standard_escape_char, p.tok_i, .{ .unsigned = i - 1 });
c = 0x1B;
},
'f' => c = 0x0C,
'v' => c = 0x0B,
'x' => c = try p.parseNumberEscape(p.tok_i, 16, slice, &i),
'0'...'7' => c = try p.parseNumberEscape(p.tok_i, 8, slice, &i),
'u', 'U' => return p.todo("unicode escapes in char literals"),
else => unreachable,
const slice = char_kind.contentSlice(p.tokSlice(p.tok_i));
if (slice.len == 1 and std.ascii.isASCII(slice[0])) {
// fast path: single unescaped ASCII char
val = slice[0];
} else {
var char_literal_parser = CharLiteral.Parser.init(slice, char_kind, p.comp);
const max_chars_expected = 4;
var stack_fallback = std.heap.stackFallback(max_chars_expected * @sizeOf(u32), p.comp.gpa);
var chars = std.ArrayList(u32).initCapacity(stack_fallback.get(), max_chars_expected) catch unreachable; // stack allocation already succeeded
defer chars.deinit();
while (char_literal_parser.next()) |item| switch (item) {
.value => |c| try chars.append(c),
.improperly_encoded => |s| {
try chars.ensureUnusedCapacity(s.len);
for (s) |c| chars.appendAssumeCapacity(c);
},
.utf8_text => |view| {
var it = view.iterator();
var max_codepoint: u21 = 0;
try chars.ensureUnusedCapacity(view.bytes.len);
while (it.nextCodepoint()) |c| {
max_codepoint = @max(max_codepoint, c);
chars.appendAssumeCapacity(c);
}
if (max_codepoint > char_kind.maxCodepoint(p.comp)) {
char_literal_parser.err(.char_too_large, .{ .none = {} });
}
},
// These are safe since the source is checked to be valid utf8.
0b1100_0000...0b1101_1111 => {
c &= 0b00011111;
c <<= 6;
c |= slice[i + 1] & 0b00111111;
i += 1;
multibyte = true;
},
0b1110_0000...0b1110_1111 => {
c &= 0b00001111;
c <<= 6;
c |= slice[i + 1] & 0b00111111;
c <<= 6;
c |= slice[i + 2] & 0b00111111;
i += 2;
multibyte = true;
},
0b1111_0000...0b1111_0111 => {
c &= 0b00000111;
c <<= 6;
c |= slice[i + 1] & 0b00111111;
c <<= 6;
c |= slice[i + 2] & 0b00111111;
c <<= 6;
c |= slice[i + 3] & 0b00111111;
i += 3;
multibyte = true;
},
else => {},
};
const is_multichar = chars.items.len > 1;
if (is_multichar) {
if (char_kind == .char and chars.items.len == 4) {
char_literal_parser.warn(.four_char_char_literal, .{ .none = {} });
} else if (char_kind == .char) {
char_literal_parser.warn(.multichar_literal_warning, .{ .none = {} });
} else {
const kind = switch (char_kind) {
.wide => "wide",
.utf_8, .utf_16, .utf_32 => "Unicode",
else => unreachable,
};
char_literal_parser.err(.invalid_multichar_literal, .{ .str = kind });
}
}
if (c > max or (multibyte and !allow_multibyte)) try p.err(.char_too_large);
switch (multichar) {
0, 2, 4 => multichar += 1,
1 => {
multichar = 99;
try p.err(.multichar_literal);
},
3 => {
try p.err(.unicode_multichar_literal);
return error.ParsingFailed;
},
5 => {
try p.err(.wide_multichar_literal);
val = 0;
multichar = 6;
},
6 => val = 0,
else => {},
var multichar_overflow = false;
if (char_kind == .char and is_multichar) {
for (chars.items) |item| {
val, const overflowed = @shlWithOverflow(val, 8);
multichar_overflow = multichar_overflow or overflowed != 0;
val += @as(u8, @truncate(item));
}
} else if (chars.items.len > 0) {
val = chars.items[chars.items.len - 1];
}
const product, const overflowed = @mulWithOverflow(val, max +% 1);
if (overflowed != 0 and !overflow_reported) {
try p.errExtra(.char_lit_too_wide, p.tok_i, .{ .unsigned = i });
overflow_reported = true;
if (multichar_overflow) {
char_literal_parser.err(.char_lit_too_wide, .{ .none = {} });
}
for (char_literal_parser.errors.constSlice()) |item| {
try p.errExtra(item.tag, p.tok_i, item.extra);
}
val = product + c;
}
const ty = char_kind.charLiteralType(p.comp);
// This is the type the literal will have if we're in a macro; macros always operate on intmax_t/uintmax_t values
const macro_ty = if (ty.isUnsignedInt(p.comp) or (p.tok_ids[p.tok_i] == .char_literal and p.comp.getCharSignedness() == .unsigned))
const macro_ty = if (ty.isUnsignedInt(p.comp) or (char_kind == .char and p.comp.getCharSignedness() == .unsigned))
p.comp.types.intmax.makeIntegerUnsigned()
else
p.comp.types.intmax;
@ -7892,7 +7905,7 @@ fn bitInt(p: *Parser, base: u8, buf: []const u8, suffix: NumberSuffix, tok_i: To
try p.errStr(.pre_c2x_compat, tok_i, "'_BitInt' suffix for literals");
try p.errTok(.bitint_suffix, tok_i);
var managed = try big.int.Managed.init(p.comp.gpa);
var managed = try big.int.Managed.init(p.gpa);
defer managed.deinit();
managed.setString(base, buf) catch |e| switch (e) {

View File

@ -89,6 +89,18 @@ top_expansion_buf: ExpandBuf,
verbose: bool = false,
preserve_whitespace: bool = false,
/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
linemarkers: Linemarkers = .none,
pub const Linemarkers = enum {
/// No linemarker tokens. Required setting if parser will run
none,
/// #line <num> "filename"
line_directives,
/// # <num> "filename" flags
numeric_directives,
};
pub fn init(comp: *Compilation) Preprocessor {
const pp = Preprocessor{
.comp = comp,
@ -111,6 +123,10 @@ const builtin_macros = struct {
.id = .macro_param_has_attribute,
.source = .generated,
}};
const has_declspec_attribute = [1]RawToken{.{
.id = .macro_param_has_declspec_attribute,
.source = .generated,
}};
const has_warning = [1]RawToken{.{
.id = .macro_param_has_warning,
.source = .generated,
@ -173,6 +189,7 @@ fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: [
pub fn addBuiltinMacros(pp: *Preprocessor) !void {
try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute);
try pp.addBuiltinMacro("__has_declspec_attribute", true, &builtin_macros.has_declspec_attribute);
try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning);
try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature);
try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension);
@ -201,11 +218,52 @@ pub fn deinit(pp: *Preprocessor) void {
/// Preprocess a source file, returns eof token.
pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token {
return pp.preprocessExtra(source) catch |er| switch (er) {
const eof = pp.preprocessExtra(source) catch |er| switch (er) {
// This cannot occur in the main file and is handled in `include`.
error.StopPreprocessing => unreachable,
else => |e| return e,
};
try eof.checkMsEof(source, pp.comp);
return eof;
}
/// Tokenize a file without any preprocessing, returns eof token.
pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
assert(pp.linemarkers == .none);
assert(pp.preserve_whitespace == false);
var tokenizer = Tokenizer{
.buf = source.buf,
.comp = pp.comp,
.source = source.id,
};
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
while (true) {
var tok = tokenizer.next();
if (tok.id == .eof) return tokFromRaw(tok);
try pp.tokens.append(pp.gpa, tokFromRaw(tok));
}
}
pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
if (pp.linemarkers == .none) return;
try pp.tokens.append(pp.gpa, .{ .id = .include_start, .loc = .{
.id = source.id,
.byte_offset = std.math.maxInt(u32),
.line = 0,
} });
}
pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
if (pp.linemarkers == .none) return;
try pp.tokens.append(pp.gpa, .{ .id = .include_resume, .loc = .{
.id = source,
.byte_offset = offset,
.line = line,
} });
}
/// Return the name of the #ifndef guard macro that starts a source, if any.
@ -226,14 +284,6 @@ fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
}
fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
if (pp.comp.invalid_utf8_locs.get(source.id)) |offset| {
try pp.comp.diag.add(.{
.tag = .invalid_utf8,
// Todo: compute line number
.loc = .{ .id = source.id, .byte_offset = offset },
}, &.{});
return error.FatalError;
}
var guard_name = pp.findIncludeGuard(source);
pp.preprocess_count += 1;
@ -493,7 +543,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
_ = pp.defines.remove(macro_name);
try pp.expectNl(&tokenizer);
},
.keyword_include => try pp.include(&tokenizer, .first),
.keyword_include => {
try pp.include(&tokenizer, .first);
continue;
},
.keyword_include_next => {
try pp.comp.diag.add(.{
.tag = .include_next,
@ -510,7 +563,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
}
},
.keyword_embed => try pp.embed(&tokenizer),
.keyword_pragma => try pp.pragma(&tokenizer, directive, null, &.{}),
.keyword_pragma => {
try pp.pragma(&tokenizer, directive, null, &.{});
continue;
},
.keyword_line => {
// #line number "file"
const digits = tokenizer.nextNoWS();
@ -551,6 +607,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
skipToNl(&tokenizer);
},
}
if (pp.preserve_whitespace) {
tok.id = .nl;
try pp.tokens.append(pp.gpa, tokFromRaw(tok));
}
},
.whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)),
.nl => {
@ -928,6 +988,12 @@ fn skip(
line_start = true;
tokenizer.index += 1;
tokenizer.line += 1;
if (pp.preserve_whitespace) {
try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
} else {
line_start = false;
tokenizer.index += 1;
@ -980,9 +1046,14 @@ fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf
.hash_hash => {
var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
while (rhs.id == .whitespace) {
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
while (true) {
if (rhs.id == .whitespace) {
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
} else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
} else break;
}
try pp.pasteTokens(&buf, &.{rhs});
},
@ -1168,7 +1239,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
}
for (params) |tok| {
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws, false);
try pp.char_buf.appendSlice(str);
}
@ -1212,6 +1283,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool {
switch (builtin) {
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
.macro_param_has_feature,
.macro_param_has_extension,
.macro_param_has_builtin,
@ -1220,6 +1292,7 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
var identifier: ?Token = null;
for (param_toks) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id == .comment) continue;
if (!tok.id.isMacroIdentifier()) {
invalid = tok;
break;
@ -1238,6 +1311,12 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
const ident_str = pp.expandedSlice(identifier.?);
return switch (builtin) {
.macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
.macro_param_has_declspec_attribute => {
return if (pp.comp.langopts.declspec_attrs)
Attribute.fromString(.declspec, null, ident_str) != null
else
false;
},
.macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
.macro_param_has_extension => features.hasExtension(pp.comp, ident_str),
.macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
@ -1272,6 +1351,7 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
var identifier: ?Token = null;
for (param_toks) |tok| switch (tok.id) {
.macro_ws => continue,
.comment => continue,
else => {
if (identifier) |_| invalid = tok else identifier = tok;
},
@ -1353,6 +1433,10 @@ fn expandFuncMacro(
const next = switch (raw_next.id) {
.macro_ws => continue,
.hash_hash => continue,
.comment => if (!pp.comp.langopts.preserve_comments_in_macros)
continue
else
&[1]Token{tokFromRaw(raw_next)},
.macro_param, .macro_param_no_expand => if (args.items[raw_next.end].len > 0)
args.items[raw_next.end]
else
@ -1396,6 +1480,7 @@ fn expandFuncMacro(
try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
},
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
.macro_param_has_warning,
.macro_param_has_feature,
.macro_param_has_extension,
@ -1426,6 +1511,7 @@ fn expandFuncMacro(
if (string) |_| invalid = tok else string = tok;
},
.macro_ws => continue,
.comment => continue,
else => {
invalid = tok;
break;
@ -1881,18 +1967,30 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
Token.free(tok.expansion_locs, pp.gpa);
continue;
}
if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
Token.free(tok.expansion_locs, pp.gpa);
continue;
}
tok.id.simplifyMacroKeywordExtra(true);
pp.tokens.appendAssumeCapacity(tok.*);
}
if (pp.preserve_whitespace) {
try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl);
while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ .id = .generated } });
pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
}
}
fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
fn expandedSliceExtra(
pp: *const Preprocessor,
tok: Token,
macro_ws_handling: enum { single_macro_ws, preserve_macro_ws },
path_escapes: bool,
) []const u8 {
if (tok.id.lexeme()) |some| {
if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
}
@ -1901,6 +1999,7 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en
.comp = pp.comp,
.index = tok.loc.byte_offset,
.source = .generated,
.path_escapes = path_escapes,
};
if (tok.id == .macro_string) {
while (true) : (tmp_tokenizer.index += 1) {
@ -1914,23 +2013,27 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en
/// Get expanded token source string.
pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
return pp.expandedSliceExtra(tok, .single_macro_ws);
return pp.expandedSliceExtra(tok, .single_macro_ws, false);
}
/// Concat two tokens and add the result to pp.generated
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void {
const lhs = while (lhs_toks.popOrNull()) |lhs| {
if (lhs.id == .macro_ws)
Token.free(lhs.expansion_locs, pp.gpa)
else
if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
(lhs.id != .macro_ws and lhs.id != .comment))
break lhs;
Token.free(lhs.expansion_locs, pp.gpa);
} else {
return bufCopyTokens(lhs_toks, rhs_toks, &.{});
};
var rhs_rest: u32 = 1;
const rhs = for (rhs_toks) |rhs| {
if (rhs.id != .macro_ws) break rhs;
if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
(rhs.id != .macro_ws and rhs.id != .comment))
break rhs;
rhs_rest += 1;
} else {
return lhs_toks.appendAssumeCapacity(lhs);
@ -1952,9 +2055,15 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
.index = @intCast(start),
.source = .generated,
};
const pasted_token = tmp_tokenizer.nextNoWS();
const next = tmp_tokenizer.nextNoWS().id;
if (next != .nl and next != .eof) {
const pasted_token = tmp_tokenizer.nextNoWSComments();
const next = tmp_tokenizer.nextNoWSComments();
const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
.placemarker
else
pasted_token.id;
try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
if (next.id != .nl and next.id != .eof) {
try pp.comp.diag.add(.{
.tag = .pasting_formed_invalid,
.loc = lhs.loc,
@ -1963,13 +2072,9 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
pp.comp.generated_buf.items[start..end],
) },
}, lhs.expansionSlice());
try lhs_toks.append(tokFromRaw(next));
}
const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
.placemarker
else
pasted_token.id;
try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
}
@ -2053,7 +2158,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
tok.id.simplifyMacroKeyword();
switch (tok.id) {
.hash_hash => {
const next = tokenizer.nextNoWS();
const next = tokenizer.nextNoWSComments();
switch (next.id) {
.nl, .eof => {
try pp.err(tok, .hash_hash_at_end);
@ -2069,6 +2174,13 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
try pp.token_buf.append(next);
},
.nl, .eof => break tok.start,
.comment => if (pp.comp.langopts.preserve_comments_in_macros) {
if (need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
try pp.token_buf.append(tok);
},
.whitespace => need_ws = true,
else => {
if (tok.id != .whitespace and need_ws) {
@ -2152,6 +2264,13 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
switch (tok.id) {
.nl, .eof => break tok.start,
.whitespace => need_ws = pp.token_buf.items.len != 0,
.comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
if (need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
try pp.token_buf.append(tok);
},
.hash => {
if (tok.id != .whitespace and need_ws) {
need_ws = false;
@ -2192,7 +2311,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
return skipToNl(tokenizer);
}
const saved_tokenizer = tokenizer.*;
const next = tokenizer.nextNoWS();
const next = tokenizer.nextNoWSComments();
if (next.id == .nl or next.id == .eof) {
try pp.err(tok, .hash_hash_at_end);
return;
@ -2249,6 +2368,8 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
/// Handle an #embed directive
fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
tokenizer.path_escapes = true;
defer tokenizer.path_escapes = false;
const first = tokenizer.nextNoWS();
const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) {
error.InvalidInclude => return,
@ -2256,7 +2377,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
};
// Check for empty filename.
const tok_slice = pp.expandedSlice(filename_tok);
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename);
return;
@ -2298,6 +2419,8 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
// Handle a #include directive.
fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
tokenizer.path_escapes = true;
defer tokenizer.path_escapes = false;
const first = tokenizer.nextNoWS();
const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
error.InvalidInclude => return,
@ -2323,10 +2446,32 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc
pp.verboseLog(first, "include file {s}", .{new_source.path});
}
_ = pp.preprocessExtra(new_source) catch |er| switch (er) {
error.StopPreprocessing => {},
const tokens_start = pp.tokens.len;
try pp.addIncludeStart(new_source);
const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
error.StopPreprocessing => {
for (pp.tokens.items(.expansion_locs)[tokens_start..]) |loc| Token.free(loc, pp.gpa);
pp.tokens.len = tokens_start;
return;
},
else => |e| return e,
};
try eof.checkMsEof(new_source, pp.comp);
if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
if (pp.linemarkers == .none) return;
var next = first;
while (true) {
var tmp = tokenizer.*;
next = tmp.nextNoWS();
if (next.id != .nl) break;
tokenizer.* = tmp;
}
try pp.addIncludeResume(next.source, next.end, next.line);
}
/// tokens that are part of a pragma directive can happen in 3 ways:
@ -2441,7 +2586,7 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
// Check for empty filename.
const tok_slice = pp.expandedSlice(filename_tok);
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename);
return error.InvalidInclude;
@ -2455,28 +2600,90 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
else => unreachable,
};
return (try pp.comp.findInclude(filename, first.source, include_type, which)) orelse
return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
pp.fatal(first, "'{s}' not found", .{filename});
}
fn printLinemarker(
pp: *Preprocessor,
w: anytype,
line_no: u32,
source: Source,
start_resume: enum(u8) { start, @"resume", none },
) !void {
try w.writeByte('#');
if (pp.linemarkers == .line_directives) try w.writeAll("line");
// line_no is 0 indexed
try w.print(" {d} \"{s}\"", .{ line_no + 1, source.path });
if (pp.linemarkers == .numeric_directives) {
switch (start_resume) {
.none => {},
.start => try w.writeAll(" 1"),
.@"resume" => try w.writeAll(" 2"),
}
switch (source.kind) {
.user => {},
.system => try w.writeAll(" 3"),
.extern_c_system => try w.writeAll(" 3 4"),
}
}
try w.writeByte('\n');
}
// After how many empty lines are needed to replace them with linemarkers.
const collapse_newlines = 8;
/// Pretty print tokens and try to preserve whitespace.
pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
const tok_ids = pp.tokens.items(.id);
var i: u32 = 0;
while (true) : (i += 1) {
var last_nl = true;
outer: while (true) : (i += 1) {
var cur: Token = pp.tokens.get(i);
switch (cur.id) {
.eof => {
if (pp.tokens.len > 1 and pp.tokens.items(.id)[i - 1] != .nl) try w.writeByte('\n');
break;
if (!last_nl) try w.writeByte('\n');
return;
},
.nl => {
var newlines: u32 = 0;
for (tok_ids[i..], i..) |id, j| {
if (id == .nl) {
newlines += 1;
} else if (id == .eof) {
if (!last_nl) try w.writeByte('\n');
return;
} else if (id != .whitespace) {
if (pp.linemarkers == .none) {
if (newlines < 2) break;
} else if (newlines < collapse_newlines) {
break;
}
i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
if (!last_nl) try w.writeAll("\n");
if (pp.linemarkers != .none) {
const next = pp.tokens.get(i);
const source = pp.comp.getSource(next.loc.id);
const line_col = source.lineCol(next.loc);
try pp.printLinemarker(w, line_col.line_no, source, .none);
last_nl = true;
}
continue :outer;
}
}
last_nl = true;
try w.writeAll("\n");
},
.nl => try w.writeAll("\n"),
.keyword_pragma => {
const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
const end_idx = mem.indexOfScalarPos(Token.Id, pp.tokens.items(.id), i, .nl) orelse i + 1;
const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
const pragma_len = @as(u32, @intCast(end_idx)) - i;
if (pp.comp.getPragma(pragma_name)) |prag| {
if (!prag.shouldPreserveTokens(pp, i + 1)) {
try w.writeByte('\n');
i += pragma_len;
cur = pp.tokens.get(i);
continue;
@ -2488,6 +2695,7 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
cur = pp.tokens.get(i);
if (cur.id == .nl) {
try w.writeByte('\n');
last_nl = true;
break;
}
try w.writeByte(' ');
@ -2498,14 +2706,30 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
.whitespace => {
var slice = pp.expandedSlice(cur);
while (mem.indexOfScalar(u8, slice, '\n')) |some| {
try w.writeByte('\n');
if (pp.linemarkers != .none) try w.writeByte('\n');
slice = slice[some + 1 ..];
}
for (slice) |_| try w.writeByte(' ');
last_nl = false;
},
.include_start => {
const source = pp.comp.getSource(cur.loc.id);
try pp.printLinemarker(w, 0, source, .start);
last_nl = true;
},
.include_resume => {
const source = pp.comp.getSource(cur.loc.id);
const line_col = source.lineCol(cur.loc);
if (!last_nl) try w.writeAll("\n");
try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
last_nl = true;
},
else => {
const slice = pp.expandedSlice(cur);
try w.writeAll(slice);
last_nl = false;
},
}
}
@ -2527,6 +2751,7 @@ test "Preserve pragma tokens sometimes" {
defer pp.deinit();
pp.preserve_whitespace = true;
assert(pp.linemarkers == .none);
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
const eof = try pp.preprocess(test_runner_macros);
@ -2557,13 +2782,14 @@ test "Preserve pragma tokens sometimes" {
\\#pragma once
\\
;
try Test.check(omit_once, "int x;\n");
// TODO should only be one newline afterwards when emulating clang
try Test.check(omit_once, "\nint x;\n\n");
const omit_poison =
\\#pragma GCC poison foobar
\\
;
try Test.check(omit_poison, "");
try Test.check(omit_poison, "\n");
}
test "destringify" {

24
deps/aro/README.md vendored Normal file
View File

@ -0,0 +1,24 @@
# Aro
A C compiler with the goal of providing fast compilation and low memory usage with good diagnostics.
Aro is included as an alternative C frontend in the [Zig compiler](https://github.com/ziglang/zig)
for `translate-c` and eventually compiling C files by translating them to Zig first.
Aro is developed in https://github.com/Vexu/arocc and the Zig dependency is
updated from there when needed.
Currently most of standard C is supported up to C23 and as are many of the common
extensions from GNU, MSVC, and Clang
Basic code generation is supported for x86-64 linux and can produce a valid hello world:
```sh-session
$ cat hello.c
extern int printf(const char *restrict fmt, ...);
int main(void) {
printf("Hello, world!\n");
return 0;
}
$ zig build run -- hello.c -o hello
$ ./hello
Hello, world!
$
```

32
deps/aro/Source.zig vendored
View File

@ -7,6 +7,16 @@ pub const Id = enum(u32) {
_,
};
/// Classifies the file for line marker output in -E mode
pub const Kind = enum {
/// regular file
user,
/// Included from a system include directory
system,
/// Included from an "implicit extern C" directory
extern_c_system,
};
pub const Location = struct {
id: Id = .unused,
byte_offset: u32 = 0,
@ -24,6 +34,7 @@ id: Id,
/// from the original raw buffer. The same position can appear multiple times if multiple
/// consecutive splices happened. Guaranteed to be non-decreasing
splice_locs: []const u32,
kind: Kind,
/// Todo: binary search instead of scanning entire `splice_locs`.
pub fn numSplicesBefore(source: Source, byte_offset: u32) u32 {
@ -59,7 +70,10 @@ pub fn lineCol(source: Source, loc: Location) LineCol {
var width: u32 = 0;
while (i < loc.byte_offset) : (col += 1) { // TODO this is still incorrect, but better
const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch unreachable;
const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch {
i += 1;
continue;
};
const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch unreachable;
width += codepointWidth(cp);
i += len;
@ -107,19 +121,3 @@ fn codepointWidth(cp: u32) u32 {
else => 1,
};
}
/// Returns the first offset, if any, in buf where an invalid utf8 sequence
/// is found. Code adapted from std.unicode.utf8ValidateSlice
pub fn offsetOfInvalidUtf8(self: Source) ?u32 {
const buf = self.buf;
std.debug.assert(buf.len <= std.math.maxInt(u32));
var i: u32 = 0;
while (i < buf.len) {
if (std.unicode.utf8ByteSequenceLength(buf[i])) |cp_len| {
if (i + cp_len > buf.len) return i;
if (std.meta.isError(std.unicode.utf8Decode(buf[i .. i + cp_len]))) return i;
i += cp_len;
} else |_| return i;
}
return null;
}

View File

@ -48,7 +48,7 @@ pub fn scopeEnd(s: SymbolStack) u32 {
}
pub fn pushScope(s: *SymbolStack, p: *Parser) !void {
try s.scopes.append(p.pp.comp.gpa, @intCast(s.syms.len));
try s.scopes.append(p.gpa, @intCast(s.syms.len));
}
pub fn popScope(s: *SymbolStack) void {
@ -154,7 +154,7 @@ pub fn defineTypedef(
switch (kinds[i]) {
.typedef => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (ty.eql(prev_ty, p.pp.comp, true)) break;
if (ty.eql(prev_ty, p.comp, true)) break;
try p.errStr(.redefinition_of_typedef, tok, try p.typePairStrExtra(ty, " vs ", prev_ty));
const previous_tok = s.syms.items(.tok)[i];
if (previous_tok != 0) try p.errTok(.previous_definition, previous_tok);
@ -163,7 +163,7 @@ pub fn defineTypedef(
else => {},
}
}
try s.syms.append(p.pp.comp.gpa, .{
try s.syms.append(p.gpa, .{
.kind = .typedef,
.name = name,
.tok = tok,
@ -197,7 +197,7 @@ pub fn defineSymbol(
},
.decl => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
}
@ -211,7 +211,7 @@ pub fn defineSymbol(
else => {},
}
}
try s.syms.append(p.pp.comp.gpa, .{
try s.syms.append(p.gpa, .{
.kind = if (constexpr) .constexpr else .def,
.name = name,
.tok = tok,
@ -243,7 +243,7 @@ pub fn declareSymbol(
},
.decl => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
}
@ -251,7 +251,7 @@ pub fn declareSymbol(
},
.def, .constexpr => if (names[i] == name) {
const prev_ty = s.syms.items(.ty)[i];
if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
break;
@ -261,7 +261,7 @@ pub fn declareSymbol(
else => {},
}
}
try s.syms.append(p.pp.comp.gpa, .{
try s.syms.append(p.gpa, .{
.kind = .decl,
.name = name,
.tok = tok,
@ -290,7 +290,7 @@ pub fn defineParam(s: *SymbolStack, p: *Parser, name: StringId, ty: Type, tok: T
if (ty.is(.fp16) and !p.comp.hasHalfPrecisionFloatABI()) {
try p.errStr(.suggest_pointer_for_invalid_fp16, tok, "parameters");
}
try s.syms.append(p.pp.comp.gpa, .{
try s.syms.append(p.gpa, .{
.kind = .def,
.name = name,
.tok = tok,
@ -365,7 +365,7 @@ pub fn defineEnumeration(
else => {},
}
}
try s.syms.append(p.pp.comp.gpa, .{
try s.syms.append(p.gpa, .{
.kind = .enumeration,
.name = name,
.tok = tok,

142
deps/aro/Tokenizer.zig vendored
View File

@ -3,8 +3,6 @@ const assert = std.debug.assert;
const Compilation = @import("Compilation.zig");
const Source = @import("Source.zig");
const LangOpts = @import("LangOpts.zig");
const CharInfo = @import("CharInfo.zig");
const unicode = @import("unicode.zig");
const Tokenizer = @This();
@ -108,6 +106,8 @@ pub const Token = struct {
macro_ws,
/// Special token for implementing __has_attribute
macro_param_has_attribute,
/// Special token for implementing __has_declspec_attribute
macro_param_has_declspec_attribute,
/// Special token for implementing __has_warning
macro_param_has_warning,
/// Special token for implementing __has_feature
@ -290,6 +290,16 @@ pub const Token = struct {
/// See C99 6.10.3.3.2
placemarker,
/// Virtual linemarker token output from preprocessor to indicate start of a new include
include_start,
/// Virtual linemarker token output from preprocessor to indicate resuming a file after
/// completion of the preceding #include
include_resume,
/// A comment token if asked to preserve comments.
comment,
/// Return true if token is identifier or keyword.
pub fn isMacroIdentifier(id: Id) bool {
switch (id) {
@ -458,6 +468,10 @@ pub const Token = struct {
pub fn lexeme(id: Id) ?[]const u8 {
return switch (id) {
.include_start,
.include_resume,
=> unreachable,
.invalid,
.identifier,
.extended_identifier,
@ -475,6 +489,7 @@ pub const Token = struct {
.whitespace,
.pp_num,
.embed_byte,
.comment,
=> null,
.zero => "0",
@ -487,6 +502,7 @@ pub const Token = struct {
.stringify_param,
.stringify_va_args,
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
.macro_param_has_warning,
.macro_param_has_feature,
.macro_param_has_extension,
@ -817,24 +833,6 @@ pub const Token = struct {
};
}
/// Check if codepoint may appear in specified context
/// does not check basic character set chars because the tokenizer handles them separately to keep the common
/// case on the fast path
pub fn mayAppearInIdent(comp: *const Compilation, codepoint: u21, where: enum { start, inside }) bool {
if (codepoint == '$') return comp.langopts.dollars_in_identifiers;
if (codepoint <= 0x7F) return false;
return switch (where) {
.start => if (comp.langopts.standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
else
CharInfo.isC99IdChar(codepoint) and !CharInfo.isC99DisallowedInitialIDChar(codepoint),
.inside => if (comp.langopts.standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint)
else
CharInfo.isC99IdChar(codepoint),
};
}
const all_kws = std.ComptimeStringMap(Id, .{
.{ "auto", auto: {
@setEvalBranchQuota(3000);
@ -986,6 +984,8 @@ index: u32 = 0,
source: Source.Id,
comp: *const Compilation,
line: u32 = 1,
/// Used to parse include strings with Windows style paths.
path_escapes: bool = false,
pub fn next(self: *Tokenizer) Token {
var state: enum {
@ -996,8 +996,10 @@ pub fn next(self: *Tokenizer) Token {
U,
L,
string_literal,
path_escape,
char_literal_start,
char_literal,
char_escape_sequence,
escape_sequence,
octal_escape,
hex_escape,
@ -1038,18 +1040,8 @@ pub fn next(self: *Tokenizer) Token {
var return_state = state;
var counter: u32 = 0;
var codepoint_len: u3 = undefined;
while (self.index < self.buf.len) : (self.index += codepoint_len) {
// Source files get checked for valid utf-8 before being tokenized so it is safe to use
// these versions.
codepoint_len = unicode.utf8ByteSequenceLength_unsafe(self.buf[self.index]);
const c: u21 = switch (codepoint_len) {
1 => @as(u21, self.buf[self.index]),
2 => unicode.utf8Decode2_unsafe(self.buf[self.index..]),
3 => unicode.utf8Decode3_unsafe(self.buf[self.index..]),
4 => unicode.utf8Decode4_unsafe(self.buf[self.index..]),
else => unreachable,
};
while (self.index < self.buf.len) : (self.index += 1) {
const c = self.buf[self.index];
switch (state) {
.start => switch (c) {
'\n' => {
@ -1137,11 +1129,25 @@ pub fn next(self: *Tokenizer) Token {
'#' => state = .hash,
'0'...'9' => state = .pp_num,
'\t', '\x0B', '\x0C', ' ' => state = .whitespace,
else => if (Token.mayAppearInIdent(self.comp, c, .start)) {
'$' => if (self.comp.langopts.dollars_in_identifiers) {
state = .extended_identifier;
} else {
id = .invalid;
self.index += codepoint_len;
self.index += 1;
break;
},
0x1A => if (self.comp.langopts.ms_extensions) {
id = .eof;
break;
} else {
id = .invalid;
self.index += 1;
break;
},
0x80...0xFF => state = .extended_identifier,
else => {
id = .invalid;
self.index += 1;
break;
},
},
@ -1165,7 +1171,7 @@ pub fn next(self: *Tokenizer) Token {
state = .string_literal;
},
else => {
codepoint_len = 0;
self.index -= 1;
state = .identifier;
},
},
@ -1179,7 +1185,7 @@ pub fn next(self: *Tokenizer) Token {
state = .char_literal_start;
},
else => {
codepoint_len = 0;
self.index -= 1;
state = .identifier;
},
},
@ -1193,7 +1199,7 @@ pub fn next(self: *Tokenizer) Token {
state = .string_literal;
},
else => {
codepoint_len = 0;
self.index -= 1;
state = .identifier;
},
},
@ -1207,14 +1213,14 @@ pub fn next(self: *Tokenizer) Token {
state = .string_literal;
},
else => {
codepoint_len = 0;
self.index -= 1;
state = .identifier;
},
},
.string_literal => switch (c) {
'\\' => {
return_state = .string_literal;
state = .escape_sequence;
state = if (self.path_escapes) .path_escape else .escape_sequence;
},
'"' => {
self.index += 1;
@ -1227,12 +1233,13 @@ pub fn next(self: *Tokenizer) Token {
'\r' => unreachable,
else => {},
},
.path_escape => {
state = .string_literal;
},
.char_literal_start => switch (c) {
'\\' => {
return_state = .char_literal;
state = .escape_sequence;
state = .char_escape_sequence;
},
'\'', '\n' => {
id = .invalid;
break;
@ -1243,8 +1250,7 @@ pub fn next(self: *Tokenizer) Token {
},
.char_literal => switch (c) {
'\\' => {
return_state = .char_literal;
state = .escape_sequence;
state = .char_escape_sequence;
},
'\'' => {
self.index += 1;
@ -1256,14 +1262,15 @@ pub fn next(self: *Tokenizer) Token {
},
else => {},
},
.char_escape_sequence => switch (c) {
'\r', '\n' => unreachable, // removed by line splicing
else => state = .char_literal,
},
.escape_sequence => switch (c) {
'\'', '"', '?', '\\', 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v' => {
state = return_state;
},
'\n' => {
state = return_state;
self.line += 1;
},
'\r', '\n' => unreachable, // removed by line splicing
'0'...'7' => {
counter = 1;
state = .octal_escape;
@ -1288,14 +1295,14 @@ pub fn next(self: *Tokenizer) Token {
if (counter == 3) state = return_state;
},
else => {
codepoint_len = 0;
self.index -= 1;
state = return_state;
},
},
.hex_escape => switch (c) {
'0'...'9', 'a'...'f', 'A'...'F' => {},
else => {
codepoint_len = 0;
self.index -= 1;
state = return_state;
},
},
@ -1311,12 +1318,16 @@ pub fn next(self: *Tokenizer) Token {
},
.identifier, .extended_identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => {
if (!Token.mayAppearInIdent(self.comp, c, .inside)) {
id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
break;
}
'$' => if (self.comp.langopts.dollars_in_identifiers) {
state = .extended_identifier;
} else {
id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
break;
},
0x80...0xFF => state = .extended_identifier,
else => {
id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
break;
},
},
.equal => switch (c) {
@ -1614,6 +1625,10 @@ pub fn next(self: *Tokenizer) Token {
},
.line_comment => switch (c) {
'\n' => {
if (self.comp.langopts.preserve_comments) {
id = .comment;
break;
}
self.index -= 1;
state = .start;
},
@ -1625,7 +1640,14 @@ pub fn next(self: *Tokenizer) Token {
else => {},
},
.multi_line_comment_asterisk => switch (c) {
'/' => state = .multi_line_comment_done,
'/' => {
if (self.comp.langopts.preserve_comments) {
self.index += 1;
id = .comment;
break;
}
state = .multi_line_comment_done;
},
'\n' => {
self.line += 1;
state = .multi_line_comment;
@ -1712,9 +1734,11 @@ pub fn next(self: *Tokenizer) Token {
.extended_identifier => id = .extended_identifier,
.period2,
.string_literal,
.path_escape,
.char_literal_start,
.char_literal,
.escape_sequence,
.char_escape_sequence,
.octal_escape,
.hex_escape,
.unicode_escape,
@ -1761,6 +1785,12 @@ pub fn next(self: *Tokenizer) Token {
}
pub fn nextNoWS(self: *Tokenizer) Token {
var tok = self.next();
while (tok.id == .whitespace or tok.id == .comment) tok = self.next();
return tok;
}
pub fn nextNoWSComments(self: *Tokenizer) Token {
var tok = self.next();
while (tok.id == .whitespace) tok = self.next();
return tok;

14
deps/aro/Tree.zig vendored
View File

@ -77,6 +77,20 @@ pub const Token = struct {
return copy;
}
pub fn checkMsEof(tok: Token, source: Source, comp: *Compilation) !void {
std.debug.assert(tok.id == .eof);
if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
try comp.diag.add(.{
.tag = .ctrl_z_eof,
.loc = .{
.id = source.id,
.byte_offset = tok.loc.byte_offset,
.line = tok.loc.line,
},
}, &.{});
}
}
pub const List = std.MultiArrayList(Token);
pub const Id = Tokenizer.Token.Id;
};

6
deps/aro/Type.zig vendored
View File

@ -1727,7 +1727,11 @@ pub const Builder = struct {
ty = typeof;
} else {
ty.specifier = .int;
try p.err(.missing_type_specifier);
if (p.comp.langopts.standard.atLeast(.c2x)) {
try p.err(.missing_type_specifier_c2x);
} else {
try p.err(.missing_type_specifier);
}
}
},
.void => ty.specifier = .void,

File diff suppressed because it is too large Load Diff

View File

@ -2,10 +2,10 @@ const std = @import("std");
const Properties = @This();
language: Language,
attributes: Attributes,
header: Header,
target_set: TargetSet,
language: Language = .all_languages,
attributes: Attributes = Attributes{},
header: Header = .none,
target_set: TargetSet = TargetSet.initOne(.basic),
/// Header which must be included for a builtin to be available
pub const Header = enum {

View File

@ -68,6 +68,9 @@ pub const ComponentIterator = struct {
'z' => return .{ .spec = .z },
'w' => return .{ .spec = .w },
'F' => return .{ .spec = .F },
'G' => return .{ .spec = .G },
'H' => return .{ .spec = .H },
'M' => return .{ .spec = .M },
'a' => return .{ .spec = .a },
'A' => return .{ .spec = .A },
'V', 'q', 'E' => {
@ -233,6 +236,12 @@ const Spec = union(enum) {
w,
/// constant CFString
F,
/// id
G,
/// SEL
H,
/// struct objc_super
M,
/// __builtin_va_list
a,
/// "reference" to __builtin_va_list

View File

@ -34,7 +34,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
var idx = start_idx + 1;
const l_paren = p.pp.tokens.get(idx);
if (l_paren.id != .l_paren) {
return p.pp.comp.diag.add(.{
return p.comp.diag.add(.{
.tag = .pragma_pack_lparen,
.loc = l_paren.loc,
}, l_paren.expansionSlice());
@ -83,7 +83,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
}
}
if (action == .push) {
try pack.stack.append(p.pp.comp.gpa, .{ .label = label orelse "", .val = p.pragma_pack orelse 8 });
try pack.stack.append(p.gpa, .{ .label = label orelse "", .val = p.pragma_pack orelse 8 });
} else {
pack.pop(p, label);
if (new_val != null) {
@ -107,7 +107,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
const new_val = (try packInt(p, arg)) orelse return;
idx += 1;
if (apple_or_xl) {
try pack.stack.append(p.pp.comp.gpa, .{ .label = "", .val = p.pragma_pack });
try pack.stack.append(p.gpa, .{ .label = "", .val = p.pragma_pack });
}
p.pragma_pack = new_val;
},

2
deps/aro/target.zig vendored
View File

@ -657,6 +657,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
.netbsd => "netbsd",
.openbsd => "openbsd",
.solaris => "solaris",
.illumos => "illumos",
.windows => "windows",
.zos => "zos",
.haiku => "haiku",
@ -684,6 +685,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
.watchos => "watchos",
.driverkit => "driverkit",
.shadermodel => "shadermodel",
.liteos => "liteos",
.opencl,
.glsl450,
.vulkan,

41
deps/aro/unicode.zig vendored
View File

@ -1,41 +0,0 @@
//! Copied from https://github.com/ziglang/zig/blob/6f0807f50f4e946bb850e746beaa5d6556cf7750/lib/std/unicode.zig
//! with all safety checks removed. These functions must only be called with known-good buffers that have already
//! been validated as being legitimate UTF8-encoded data, otherwise undefined behavior will occur.
pub fn utf8ByteSequenceLength_unsafe(first_byte: u8) u3 {
return switch (first_byte) {
0b0000_0000...0b0111_1111 => 1,
0b1100_0000...0b1101_1111 => 2,
0b1110_0000...0b1110_1111 => 3,
0b1111_0000...0b1111_0111 => 4,
else => unreachable,
};
}
pub fn utf8Decode2_unsafe(bytes: []const u8) u21 {
var value: u21 = bytes[0] & 0b00011111;
value <<= 6;
return value | (bytes[1] & 0b00111111);
}
pub fn utf8Decode3_unsafe(bytes: []const u8) u21 {
var value: u21 = bytes[0] & 0b00001111;
value <<= 6;
value |= bytes[1] & 0b00111111;
value <<= 6;
return value | (bytes[2] & 0b00111111);
}
pub fn utf8Decode4_unsafe(bytes: []const u8) u21 {
var value: u21 = bytes[0] & 0b00000111;
value <<= 6;
value |= bytes[1] & 0b00111111;
value <<= 6;
value |= bytes[2] & 0b00111111;
value <<= 6;
return value | (bytes[3] & 0b00111111);
}

View File

@ -17,12 +17,14 @@ target: CrossTarget,
optimize: std.builtin.OptimizeMode,
output_file: std.Build.GeneratedFile,
link_libc: bool,
use_clang: bool,
pub const Options = struct {
source_file: std.Build.LazyPath,
target: CrossTarget,
optimize: std.builtin.OptimizeMode,
link_libc: bool = true,
use_clang: bool = true,
};
pub fn create(owner: *std.Build, options: Options) *TranslateC {
@ -43,6 +45,7 @@ pub fn create(owner: *std.Build, options: Options) *TranslateC {
.optimize = options.optimize,
.output_file = std.Build.GeneratedFile{ .step = &self.step },
.link_libc = options.link_libc,
.use_clang = options.use_clang,
};
source.addStepDependencies(&self.step);
return self;
@ -130,6 +133,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
if (self.link_libc) {
try argv_list.append("-lc");
}
if (!self.use_clang) {
try argv_list.append("-fno-clang");
}
try argv_list.append("--listen=-");

View File

@ -3980,6 +3980,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
man.hash.add(@as(u16, 0xb945)); // Random number to distinguish translate-c from compiling C objects
man.hash.addBytes(c_src);
man.hash.add(comp.c_frontend);
// If the previous invocation resulted in clang errors, we will see a hit
// here with 0 files in the manifest, in which case it is actually a miss.

View File

@ -308,7 +308,6 @@ fn transVarDecl(_: *Context, _: NodeIndex, _: ?usize) Error!void {
fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: NodeIndex, field_nodes: []const NodeIndex) Error!void {
const node_types = c.tree.nodes.items(.ty);
const ty = node_types[@intFromEnum(enum_decl)];
const node_data = c.tree.nodes.items(.data);
if (c.decl_table.get(@intFromPtr(ty.data.@"enum"))) |_|
return; // Avoid processing this decl twice
const toplevel = scope.id == .root;
@ -342,11 +341,15 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: NodeIndex, field_nodes:
else => |e| return e,
};
const val = c.tree.value_map.get(field_node).?;
const str = try std.fmt.allocPrint(c.arena, "{d}", .{val.data.int});
const int = try ZigTag.integer_literal.create(c.arena, str);
const enum_const_def = try ZigTag.enum_constant.create(c.arena, .{
.name = enum_val_name,
.is_public = toplevel,
.type = enum_const_type_node,
.value = transExpr(c, node_data[@intFromEnum(field_node)].decl.node, .used) catch @panic("TODO"),
.value = int,
});
if (toplevel)
try addTopLevelDecl(c, enum_val_name, enum_const_def)

View File

@ -4265,6 +4265,7 @@ fn cmdTranslateC(comp: *Compilation, arena: Allocator, fancy_output: ?*Compilati
defer man.deinit();
man.hash.add(@as(u16, 0xb945)); // Random number to distinguish translate-c from compiling C objects
man.hash.add(comp.c_frontend);
Compilation.cache_helpers.hashCSource(&man, c_source_file) catch |err| {
fatal("unable to process '{s}': {s}", .{ c_source_file.src_path, @errorName(err) });
};

View File

@ -9,7 +9,7 @@ If you want it to be run with `zig test` and match expected error messages:
```zig
// error
// is_test=1
// is_test=true
//
// :4:13: error: 'try' outside function scope
```
@ -22,6 +22,33 @@ This will do `zig run` on the code and expect exit code 0.
// run
```
## Translate-c
If you want to test translating C code to Zig use `translate-c`:
```c
// translate-c
// c_frontend=aro,clang
// target=x86_64-linux
//
// pub const foo = 1;
// pub const immediately_after_foo = 2;
//
// pub const somewhere_else_in_the_file = 3:
```
## Run Translated C
If you want to test translating C code to Zig and then executing it use `run-translated-c`:
```c
// run-translated-c
// c_frontend=aro,clang
// target=x86_64-linux
//
// Hello world!
```
## Incremental Compilation
Make multiple files that have ".", and then an integer, before the ".zig"

View File

@ -6,6 +6,6 @@ test "Crash" {
// error
// backend=stage2
// target=native
// is_test=1
// is_test=true
//
// :1:11: error: use of undeclared identifier 'B'

View File

@ -4,7 +4,7 @@ test "thingy" {}
// error
// backend=stage2
// target=native
// is_test=1
// is_test=true
//
// :1:6: error: duplicate test name: test.thingy
// :2:6: note: other test here

View File

@ -9,6 +9,6 @@ test "1" {
// error
// backend=stage2
// target=native
// is_test=1
// is_test=true
//
// :2:12: error: use of undeclared identifier 'Q'

View File

@ -5,6 +5,6 @@ test "example" {
// error
// backend=stage2
// target=native
// is_test=1
// is_test=true
//
// :2:12: error: expected type 'anyerror!void', found 'comptime_int'

View File

@ -6,7 +6,7 @@ test "enum" {
// error
// backend=stage2
// target=native
// is_test=1
// is_test=true
//
// :3:9: error: no field with value '@enumFromInt(5)' in enum 'test.enum.E'
// :2:15: note: declared here

View File

@ -9,7 +9,7 @@ pub fn main() void {
// run
// backend=llvm
// target=x86_64-linux-gnu
// link_libc=1
// link_libc=true
//
// f64: 2.000000
// f32: 10.000000

View File

@ -13,6 +13,6 @@ fn foo(comptime info: std.builtin.Type) !void {
}
// run
// is_test=1
// is_test=true
// backend=llvm
//

View File

@ -7,7 +7,7 @@ pub fn main() void {
// run
// backend=llvm
// target=x86_64-linux,x86_64-macos
// link_libc=1
// link_libc=true
//
// hello world!
//

View File

@ -0,0 +1,11 @@
#include <stdlib.h>
int main(void) {
int i = 0;
*&i = 42;
if (i != 42) abort();
return 0;
}
// run-translated-c
// c_frontend=clang
// link_libc=true

View File

@ -0,0 +1,16 @@
enum Foo {
FooA = 2,
FooB = 5,
Foo1,
};
// translate-c
// target=x86_64-windows-msvc
// c_frontend=clang
//
// pub const FooA: c_int = 2;
// pub const FooB: c_int = 5;
// pub const Foo1: c_int = 6;
// pub const enum_Foo = c_int;
//
// pub const Foo = enum_Foo;

View File

@ -0,0 +1,16 @@
enum Foo {
FooA = 2,
FooB = 5,
Foo1,
};
// translate-c
// target=x86_64-linux
// c_frontend=clang,aro
//
// pub const FooA: c_int = 2;
// pub const FooB: c_int = 5;
// pub const Foo1: c_int = 6;
// pub const enum_Foo = c_uint;
//
// pub const Foo = enum_Foo;

View File

@ -8,6 +8,6 @@ test "@unionInit on union w/ tag but no fields" {
}
// error
// is_test=1
// is_test=true
//
// :4:13: error: 'try' outside function scope

View File

@ -2,17 +2,14 @@ const std = @import("std");
const tests = @import("tests.zig");
const nl = if (@import("builtin").os.tag == .windows) "\r\n" else "\n";
pub fn addCases(cases: *tests.RunTranslatedCContext) void {
cases.add("dereference address of",
\\#include <stdlib.h>
\\int main(void) {
\\ int i = 0;
\\ *&i = 42;
\\ if (i != 42) abort();
\\ return 0;
\\}
, "");
// *********************************************************
// * *
// * DO NOT ADD NEW CASES HERE *
// * instead add a file to test/cases/run_translated_c *
// * *
// *********************************************************
pub fn addCases(cases: *tests.RunTranslatedCContext) void {
cases.add("division of floating literals",
\\#define _NO_CRT_STDIO_INLINE 1
\\#include <stdio.h>

View File

@ -1,6 +1,7 @@
gpa: Allocator,
arena: Allocator,
cases: std.ArrayList(Case),
translate: std.ArrayList(Translate),
incremental_cases: std.ArrayList(IncrementalCase),
pub const IncrementalCase = struct {
@ -36,7 +37,7 @@ pub const Update = struct {
Execution: []const u8,
/// A header update compiles the input with the equivalent of
/// `-femit-h` and tests the produced header against the
/// expected result
/// expected result.
Header: []const u8,
},
@ -61,6 +62,11 @@ pub const Backend = enum {
llvm,
};
pub const CFrontend = enum {
clang,
aro,
};
/// A `Case` consists of a list of `Update`. The same `Compilation` is used for each
/// update, so each update's source is treated as a single file being
/// updated by the test harness and incrementally compiled.
@ -143,6 +149,25 @@ pub const Case = struct {
}
};
pub const Translate = struct {
/// The name of the test case. This is shown if a test fails, and
/// otherwise ignored.
name: []const u8,
input: [:0]const u8,
target: CrossTarget,
link_libc: bool,
c_frontend: CFrontend,
kind: union(enum) {
/// Translate the input, run it and check that it
/// outputs the expected text.
run: []const u8,
/// Translate the input and check that it contains
/// the expected lines of code.
translate: []const []const u8,
},
};
pub fn addExe(
ctx: *Cases,
name: []const u8,
@ -346,9 +371,12 @@ pub fn addCompile(
pub fn addFromDir(ctx: *Cases, dir: std.fs.IterableDir) void {
var current_file: []const u8 = "none";
ctx.addFromDirInner(dir, &current_file) catch |err| {
std.debug.panic("test harness failed to process file '{s}': {s}\n", .{
current_file, @errorName(err),
});
std.debug.panicExtra(
@errorReturnTrace(),
@returnAddress(),
"test harness failed to process file '{s}': {s}\n",
.{ current_file, @errorName(err) },
);
};
}
@ -395,10 +423,44 @@ fn addFromDirInner(
const backends = try manifest.getConfigForKeyAlloc(ctx.arena, "backend", Backend);
const targets = try manifest.getConfigForKeyAlloc(ctx.arena, "target", CrossTarget);
const c_frontends = try manifest.getConfigForKeyAlloc(ctx.arena, "c_frontend", CFrontend);
const is_test = try manifest.getConfigForKeyAssertSingle("is_test", bool);
const link_libc = try manifest.getConfigForKeyAssertSingle("link_libc", bool);
const output_mode = try manifest.getConfigForKeyAssertSingle("output_mode", std.builtin.OutputMode);
if (manifest.type == .translate_c) {
for (c_frontends) |c_frontend| {
for (targets) |target| {
const output = try manifest.trailingLinesSplit(ctx.arena);
try ctx.translate.append(.{
.name = std.fs.path.stem(filename),
.c_frontend = c_frontend,
.target = target,
.link_libc = link_libc,
.input = src,
.kind = .{ .translate = output },
});
}
}
continue;
}
if (manifest.type == .run_translated_c) {
for (c_frontends) |c_frontend| {
for (targets) |target| {
const output = try manifest.trailingSplit(ctx.arena);
try ctx.translate.append(.{
.name = std.fs.path.stem(filename),
.c_frontend = c_frontend,
.target = target,
.link_libc = link_libc,
.input = src,
.kind = .{ .run = output },
});
}
}
continue;
}
var cases = std.ArrayList(usize).init(ctx.arena);
// Cross-product to get all possible test combinations
@ -439,21 +501,15 @@ fn addFromDirInner(
case.addCompile(src);
},
.@"error" => {
const errors = try manifest.trailingAlloc(ctx.arena);
const errors = try manifest.trailingLines(ctx.arena);
case.addError(src, errors);
},
.run => {
var output = std.ArrayList(u8).init(ctx.arena);
var trailing_it = manifest.trailing();
while (trailing_it.next()) |line| {
try output.appendSlice(line);
try output.append('\n');
}
if (output.items.len > 0) {
try output.resize(output.items.len - 1);
}
case.addCompareOutput(src, try output.toOwnedSlice());
const output = try manifest.trailingSplit(ctx.arena);
case.addCompareOutput(src, output);
},
.translate_c => @panic("c_frontend specified for compile case"),
.run_translated_c => @panic("c_frontend specified for compile case"),
.cli => @panic("TODO cli tests"),
}
}
@ -468,6 +524,7 @@ pub fn init(gpa: Allocator, arena: Allocator) Cases {
return .{
.gpa = gpa,
.cases = std.ArrayList(Case).init(gpa),
.translate = std.ArrayList(Translate).init(gpa),
.incremental_cases = std.ArrayList(IncrementalCase).init(gpa),
.arena = arena,
};
@ -482,7 +539,7 @@ pub fn lowerToBuildSteps(
incremental_exe: *std.Build.Step.Compile,
) void {
const host = std.zig.system.NativeTargetInfo.detect(.{}) catch |err|
std.debug.panic("unable to detect notive host: {s}\n", .{@errorName(err)});
std.debug.panic("unable to detect native host: {s}\n", .{@errorName(err)});
for (self.incremental_cases.items) |incr_case| {
if (true) {
@ -589,7 +646,7 @@ pub fn lowerToBuildSteps(
.Execution => |expected_stdout| no_exec: {
const run = if (case.target.ofmt == .c) run_step: {
const target_info = std.zig.system.NativeTargetInfo.detect(case.target) catch |err|
std.debug.panic("unable to detect notive host: {s}\n", .{@errorName(err)});
std.debug.panic("unable to detect target host: {s}\n", .{@errorName(err)});
if (host.getExternalExecutor(&target_info, .{ .link_libc = true }) != .native) {
// We wouldn't be able to run the compiled C code.
break :no_exec;
@ -623,6 +680,68 @@ pub fn lowerToBuildSteps(
.Header => @panic("TODO"),
}
}
for (self.translate.items) |case| switch (case.kind) {
.run => |output| {
const annotated_case_name = b.fmt("run-translated-c {s}", .{case.name});
if (opt_test_filter) |filter| {
if (std.mem.indexOf(u8, annotated_case_name, filter) == null) continue;
}
if (!std.process.can_spawn) {
std.debug.print("Unable to spawn child processes on {s}, skipping test.\n", .{@tagName(builtin.os.tag)});
continue; // Pass test.
}
const target_info = std.zig.system.NativeTargetInfo.detect(case.target) catch |err|
std.debug.panic("unable to detect target host: {s}\n", .{@errorName(err)});
if (host.getExternalExecutor(&target_info, .{ .link_libc = true }) != .native) {
// We wouldn't be able to run the compiled C code.
continue; // Pass test.
}
const write_src = b.addWriteFiles();
const file_source = write_src.add("tmp.c", case.input);
const translate_c = b.addTranslateC(.{
.source_file = file_source,
.optimize = .Debug,
.target = case.target,
.link_libc = case.link_libc,
.use_clang = case.c_frontend == .clang,
});
translate_c.step.name = b.fmt("{s} translate-c", .{annotated_case_name});
const run_exe = translate_c.addExecutable(.{});
run_exe.step.name = b.fmt("{s} build-exe", .{annotated_case_name});
run_exe.linkLibC();
const run = b.addRunArtifact(run_exe);
run.step.name = b.fmt("{s} run", .{annotated_case_name});
run.expectStdOutEqual(output);
parent_step.dependOn(&run.step);
},
.translate => |output| {
const annotated_case_name = b.fmt("zig translate-c {s}", .{case.name});
if (opt_test_filter) |filter| {
if (std.mem.indexOf(u8, annotated_case_name, filter) == null) continue;
}
const write_src = b.addWriteFiles();
const file_source = write_src.add("tmp.c", case.input);
const translate_c = b.addTranslateC(.{
.source_file = file_source,
.optimize = .Debug,
.target = case.target,
.link_libc = case.link_libc,
.use_clang = case.c_frontend == .clang,
});
translate_c.step.name = annotated_case_name;
const check_file = translate_c.addCheckFile(output);
parent_step.dependOn(&check_file.step);
},
};
}
/// Sort test filenames in-place, so that incremental test cases ("foo.0.zig",
@ -780,7 +899,7 @@ const TestManifestConfigDefaults = struct {
if (std.mem.eql(u8, key, "backend")) {
return "stage2";
} else if (std.mem.eql(u8, key, "target")) {
if (@"type" == .@"error") {
if (@"type" == .@"error" or @"type" == .translate_c or @"type" == .run_translated_c) {
return "native";
}
return comptime blk: {
@ -807,12 +926,16 @@ const TestManifestConfigDefaults = struct {
.@"error" => "Obj",
.run => "Exe",
.compile => "Obj",
.translate_c => "Obj",
.run_translated_c => "Obj",
.cli => @panic("TODO test harness for CLI tests"),
};
} else if (std.mem.eql(u8, key, "is_test")) {
return "0";
return "false";
} else if (std.mem.eql(u8, key, "link_libc")) {
return "0";
return "false";
} else if (std.mem.eql(u8, key, "c_frontend")) {
return "clang";
} else unreachable;
}
};
@ -844,6 +967,8 @@ const TestManifest = struct {
run,
cli,
compile,
translate_c,
run_translated_c,
};
const TrailingIterator = struct {
@ -912,6 +1037,10 @@ const TestManifest = struct {
break :blk .cli;
} else if (std.mem.eql(u8, raw, "compile")) {
break :blk .compile;
} else if (std.mem.eql(u8, raw, "translate-c")) {
break :blk .translate_c;
} else if (std.mem.eql(u8, raw, "run-translated-c")) {
break :blk .run_translated_c;
} else {
std.log.warn("unknown test case type requested: {s}", .{raw});
return error.UnknownTestCaseType;
@ -979,7 +1108,21 @@ const TestManifest = struct {
};
}
fn trailingAlloc(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
fn trailingSplit(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const u8 {
var out = std.ArrayList(u8).init(allocator);
defer out.deinit();
var trailing_it = self.trailing();
while (trailing_it.next()) |line| {
try out.appendSlice(line);
try out.append('\n');
}
if (out.items.len > 0) {
try out.resize(out.items.len - 1);
}
return try out.toOwnedSlice();
}
fn trailingLines(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
var out = std.ArrayList([]const u8).init(allocator);
defer out.deinit();
var it = self.trailing();
@ -989,6 +1132,28 @@ const TestManifest = struct {
return try out.toOwnedSlice();
}
fn trailingLinesSplit(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
// Collect output lines split by empty lines
var out = std.ArrayList([]const u8).init(allocator);
defer out.deinit();
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();
var it = self.trailing();
while (it.next()) |line| {
if (line.len == 0) {
if (buf.items.len != 0) {
try out.append(try buf.toOwnedSlice());
buf.items.len = 0;
}
continue;
}
try buf.appendSlice(line);
try buf.append('\n');
}
try out.append(try buf.toOwnedSlice());
return try out.toOwnedSlice();
}
fn ParseFn(comptime T: type) type {
return fn ([]const u8) anyerror!T;
}
@ -1011,8 +1176,10 @@ const TestManifest = struct {
}.parse,
.Bool => return struct {
fn parse(str: []const u8) anyerror!T {
const as_int = try std.fmt.parseInt(u1, str, 0);
return as_int > 0;
if (std.mem.eql(u8, str, "true")) return true;
if (std.mem.eql(u8, str, "false")) return false;
std.debug.print("{s}\n", .{str});
return error.InvalidBool;
}
}.parse,
.Enum => return struct {
@ -1124,9 +1291,47 @@ pub fn main() !void {
if (cases.items.len == 0) {
const backends = try manifest.getConfigForKeyAlloc(arena, "backend", Backend);
const targets = try manifest.getConfigForKeyAlloc(arena, "target", CrossTarget);
const c_frontends = try manifest.getConfigForKeyAlloc(ctx.arena, "c_frontend", CFrontend);
const is_test = try manifest.getConfigForKeyAssertSingle("is_test", bool);
const link_libc = try manifest.getConfigForKeyAssertSingle("link_libc", bool);
const output_mode = try manifest.getConfigForKeyAssertSingle("output_mode", std.builtin.OutputMode);
if (manifest.type == .translate_c) {
for (c_frontends) |c_frontend| {
for (targets) |target| {
const output = try manifest.trailingLinesSplit(ctx.arena);
try ctx.translate.append(.{
.name = std.fs.path.stem(filename),
.c_frontend = c_frontend,
.target = target,
.is_test = is_test,
.link_libc = link_libc,
.input = src,
.kind = .{ .translate = output },
});
}
}
continue;
}
if (manifest.type == .run_translated_c) {
for (c_frontends) |c_frontend| {
for (targets) |target| {
const output = try manifest.trailingSplit(ctx.arena);
try ctx.translate.append(.{
.name = std.fs.path.stem(filename),
.c_frontend = c_frontend,
.target = target,
.is_test = is_test,
.link_libc = link_libc,
.output = output,
.input = src,
.kind = .{ .run = output },
});
}
}
continue;
}
// Cross-product to get all possible test combinations
for (backends) |backend| {
for (targets) |target| {
@ -1158,7 +1363,7 @@ pub fn main() !void {
case.addCompile(src);
},
.@"error" => {
const errors = try manifest.trailingAlloc(arena);
const errors = try manifest.trailingLines(arena);
switch (strategy) {
.independent => {
case.addError(src, errors);
@ -1169,17 +1374,11 @@ pub fn main() !void {
}
},
.run => {
var output = std.ArrayList(u8).init(arena);
var trailing_it = manifest.trailing();
while (trailing_it.next()) |line| {
try output.appendSlice(line);
try output.append('\n');
}
if (output.items.len > 0) {
try output.resize(output.items.len - 1);
}
case.addCompareOutput(src, try output.toOwnedSlice());
const output = try manifest.trailingSplit(ctx.arena);
case.addCompareOutput(src, output);
},
.translate_c => @panic("c_frontend specified for compile case"),
.run_translated_c => @panic("c_frontend specified for compile case"),
.cli => @panic("TODO cli tests"),
}
}
@ -1255,6 +1454,11 @@ fn runCases(self: *Cases, zig_exe_path: []const u8) !void {
host,
);
}
for (self.translate.items) |*case| {
_ = case;
@panic("TODO is this even used?");
}
}
}

View File

@ -3,6 +3,13 @@ const builtin = @import("builtin");
const tests = @import("tests.zig");
const CrossTarget = std.zig.CrossTarget;
// ********************************************************
// * *
// * DO NOT ADD NEW CASES HERE *
// * instead add a file to test/cases/translate_c *
// * *
// ********************************************************
pub fn addCases(cases: *tests.TranslateCContext) void {
const default_enum_type = if (builtin.abi == .msvc) "c_int" else "c_uint";
@ -3315,23 +3322,6 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
\\pub const FOO_CHAR = '\x3f';
});
cases.add("enums",
\\enum Foo {
\\ FooA = 2,
\\ FooB = 5,
\\ Foo1,
\\};
, &[_][]const u8{
\\pub const FooA: c_int = 2;
\\pub const FooB: c_int = 5;
\\pub const Foo1: c_int = 6;
\\pub const enum_Foo =
++ " " ++ default_enum_type ++
\\;
,
\\pub const Foo = enum_Foo;
});
cases.add("macro cast",
\\#include <stdint.h>
\\int baz(void *arg) { return 0; }