Merge pull request #17524 from Vexu/aro-translate-c

Add ability to test Aro based `translate-c`
2026-02-21 16:54:52 +00:00 · 2023-10-18 00:32:59 -04:00 · 2023-10-18 00:32:59 -04:00 · 149f2f8adb
commit 149f2f8adb
parent 7a9500fd80 96f9e20152
42 changed files with 14599 additions and 36267 deletions
--- a/deps/aro/Attribute.zig
+++ b/deps/aro/Attribute.zig
@ -977,10 +977,11 @@ fn fromStringC2X(namespace: ?[]const u8, name: []const u8) ?Tag {
 }

 fn fromStringDeclspec(name: []const u8) ?Tag {
+    const normalized = normalize(name);
    const decls = @typeInfo(attributes).Struct.decls;
    inline for (decls, 0..) |decl, i| {
        if (@hasDecl(@field(attributes, decl.name), "declspec")) {
-            if (mem.eql(u8, @field(attributes, decl.name).declspec, name)) {
+            if (mem.eql(u8, @field(attributes, decl.name).declspec, normalized)) {
                return @enumFromInt(i);
            }
        }
--- a/deps/aro/Builtins.zig
+++ b/deps/aro/Builtins.zig
@ -125,6 +125,18 @@ fn createType(desc: TypeDescription, it: *TypeDescription.TypeIterator, comp: *c
            std.debug.assert(builder.specifier == .none);
            builder.specifier = Type.Builder.fromType(comp.types.ns_constant_string.ty);
        },
+        .G => {
+            // Todo: id
+            return .{ .specifier = .invalid };
+        },
+        .H => {
+            // Todo: SEL
+            return .{ .specifier = .invalid };
+        },
+        .M => {
+            // Todo: struct objc_super
+            return .{ .specifier = .invalid };
+        },
        .a => {
            std.debug.assert(builder.specifier == .none);
            std.debug.assert(desc.suffix.len == 0);
@ -260,8 +272,7 @@ fn createBuiltin(comp: *const Compilation, builtin: BuiltinFunction, type_arena:

 /// Asserts that the builtin has already been created
 pub fn lookup(b: *const Builtins, name: []const u8) Expanded {
-    @setEvalBranchQuota(10_000);
-    const builtin = BuiltinFunction.fromTag(std.meta.stringToEnum(BuiltinFunction.Tag, name).?);
+    const builtin = BuiltinFunction.fromName(name).?;
    const ty = b._name_to_type_map.get(name).?;
    return .{
        .builtin = builtin,
@ -271,9 +282,7 @@ pub fn lookup(b: *const Builtins, name: []const u8) Expanded {

 pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_arena: std.mem.Allocator) !?Expanded {
    const ty = b._name_to_type_map.get(name) orelse {
-        @setEvalBranchQuota(10_000);
-        const tag = std.meta.stringToEnum(BuiltinFunction.Tag, name) orelse return null;
-        const builtin = BuiltinFunction.fromTag(tag);
+        const builtin = BuiltinFunction.fromName(name) orelse return null;
        if (!comp.hasBuiltinFunction(builtin)) return null;

        try b._name_to_type_map.ensureUnusedCapacity(comp.gpa, 1);
@ -285,7 +294,7 @@ pub fn getOrCreate(b: *Builtins, comp: *Compilation, name: []const u8, type_aren
            .ty = ty,
        };
    };
-    const builtin = BuiltinFunction.fromTag(std.meta.stringToEnum(BuiltinFunction.Tag, name).?);
+    const builtin = BuiltinFunction.fromName(name).?;
    return .{
        .builtin = builtin,
        .ty = ty,
@ -301,9 +310,9 @@ test "All builtins" {

    const type_arena = arena.allocator();

-    for (0..@typeInfo(BuiltinFunction.Tag).Enum.fields.len) |i| {
-        const tag: BuiltinFunction.Tag = @enumFromInt(i);
-        const name = @tagName(tag);
+    var builtin_it = BuiltinFunction.BuiltinsIterator{};
+    while (builtin_it.next()) |entry| {
+        const name = try type_arena.dupe(u8, entry.name);
        if (try comp.builtins.getOrCreate(&comp, name, type_arena)) |func_ty| {
            const get_again = (try comp.builtins.getOrCreate(&comp, name, std.testing.failing_allocator)).?;
            const found_by_lookup = comp.builtins.lookup(name);
@ -325,10 +334,10 @@ test "Allocation failures" {
            const type_arena = arena.allocator();

            const num_builtins = 40;
-            for (0..num_builtins) |i| {
-                const tag: BuiltinFunction.Tag = @enumFromInt(i);
-                const name = @tagName(tag);
-                _ = try comp.builtins.getOrCreate(&comp, name, type_arena);
+            var builtin_it = BuiltinFunction.BuiltinsIterator{};
+            for (0..num_builtins) |_| {
+                const entry = builtin_it.next().?;
+                _ = try comp.builtins.getOrCreate(&comp, entry.name, type_arena);
            }
        }
    };
--- a/deps/aro/CharLiteral.zig
+++ b/deps/aro/CharLiteral.zig
@ -0,0 +1,298 @@
+const std = @import("std");
+const Compilation = @import("Compilation.zig");
+const Type = @import("Type.zig");
+const Diagnostics = @import("Diagnostics.zig");
+const Tokenizer = @import("Tokenizer.zig");
+const mem = std.mem;
+
+pub const Item = union(enum) {
+    /// decoded escape
+    value: u32,
+    /// Char literal in the source text is not utf8 encoded
+    improperly_encoded: []const u8,
+    /// 1 or more unescaped bytes
+    utf8_text: std.unicode.Utf8View,
+};
+
+const CharDiagnostic = struct {
+    tag: Diagnostics.Tag,
+    extra: Diagnostics.Message.Extra,
+};
+
+pub const Kind = enum {
+    char,
+    wide,
+    utf_8,
+    utf_16,
+    utf_32,
+
+    pub fn classify(id: Tokenizer.Token.Id) Kind {
+        return switch (id) {
+            .char_literal,
+            .string_literal,
+            => .char,
+            .char_literal_utf_8,
+            .string_literal_utf_8,
+            => .utf_8,
+            .char_literal_wide,
+            .string_literal_wide,
+            => .wide,
+            .char_literal_utf_16,
+            .string_literal_utf_16,
+            => .utf_16,
+            .char_literal_utf_32,
+            .string_literal_utf_32,
+            => .utf_32,
+            else => unreachable,
+        };
+    }
+
+    /// Largest unicode codepoint that can be represented by this character kind
+    /// May be smaller than the largest value that can be represented.
+    /// For example u8 char literals may only specify 0-127 via literals or
+    /// character escapes, but may specify up to \xFF via hex escapes.
+    pub fn maxCodepoint(kind: Kind, comp: *const Compilation) u21 {
+        return @intCast(switch (kind) {
+            .char => std.math.maxInt(u7),
+            .wide => @min(0x10FFFF, comp.types.wchar.maxInt(comp)),
+            .utf_8 => std.math.maxInt(u7),
+            .utf_16 => std.math.maxInt(u16),
+            .utf_32 => 0x10FFFF,
+        });
+    }
+
+    /// Largest integer that can be represented by this character kind
+    pub fn maxInt(kind: Kind, comp: *const Compilation) u32 {
+        return @intCast(switch (kind) {
+            .char, .utf_8 => std.math.maxInt(u8),
+            .wide => comp.types.wchar.maxInt(comp),
+            .utf_16 => std.math.maxInt(u16),
+            .utf_32 => std.math.maxInt(u32),
+        });
+    }
+
+    pub fn charLiteralType(kind: Kind, comp: *const Compilation) Type {
+        return switch (kind) {
+            .char => Type.int,
+            .wide => comp.types.wchar,
+            .utf_8 => .{ .specifier = .uchar },
+            .utf_16 => comp.types.uint_least16_t,
+            .utf_32 => comp.types.uint_least32_t,
+        };
+    }
+
+    /// Return the actual contents of the string literal with leading / trailing quotes and
+    /// specifiers removed
+    pub fn contentSlice(kind: Kind, delimited: []const u8) []const u8 {
+        const end = delimited.len - 1; // remove trailing quote
+        return switch (kind) {
+            .char => delimited[1..end],
+            .wide => delimited[2..end],
+            .utf_8 => delimited[3..end],
+            .utf_16 => delimited[2..end],
+            .utf_32 => delimited[2..end],
+        };
+    }
+};
+
+pub const Parser = struct {
+    literal: []const u8,
+    i: usize = 0,
+    kind: Kind,
+    /// We only want to issue a max of 1 error per char literal
+    errored: bool = false,
+    errors: std.BoundedArray(CharDiagnostic, 4) = .{},
+    comp: *const Compilation,
+
+    pub fn init(literal: []const u8, kind: Kind, comp: *const Compilation) Parser {
+        return .{
+            .literal = literal,
+            .comp = comp,
+            .kind = kind,
+        };
+    }
+
+    pub fn err(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
+        if (self.errored) return;
+        self.errored = true;
+        self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
+    }
+
+    pub fn warn(self: *Parser, tag: Diagnostics.Tag, extra: Diagnostics.Message.Extra) void {
+        if (self.errored) return;
+        self.errors.append(.{ .tag = tag, .extra = extra }) catch {};
+    }
+
+    pub fn next(self: *Parser) ?Item {
+        if (self.i >= self.literal.len) return null;
+
+        const start = self.i;
+        if (self.literal[start] != '\\') {
+            self.i = mem.indexOfScalarPos(u8, self.literal, start + 1, '\\') orelse self.literal.len;
+            const unescaped_slice = self.literal[start..self.i];
+
+            const view = std.unicode.Utf8View.init(unescaped_slice) catch {
+                if (self.kind != .char) {
+                    self.err(.illegal_char_encoding_error, .{ .none = {} });
+                } else {
+                    self.warn(.illegal_char_encoding_warning, .{ .none = {} });
+                }
+                return .{ .improperly_encoded = self.literal[start..self.i] };
+            };
+            return .{ .utf8_text = view };
+        }
+        switch (self.literal[start + 1]) {
+            'u', 'U' => return self.parseUnicodeEscape(),
+            else => return self.parseEscapedChar(),
+        }
+    }
+
+    fn parseUnicodeEscape(self: *Parser) ?Item {
+        const start = self.i;
+
+        std.debug.assert(self.literal[self.i] == '\\');
+
+        const kind = self.literal[self.i + 1];
+        std.debug.assert(kind == 'u' or kind == 'U');
+
+        self.i += 2;
+        if (self.i >= self.literal.len or !std.ascii.isHex(self.literal[self.i])) {
+            self.err(.missing_hex_escape, .{ .ascii = @intCast(kind) });
+            return null;
+        }
+        const expected_len: usize = if (kind == 'u') 4 else 8;
+        var overflowed = false;
+        var count: usize = 0;
+        var val: u32 = 0;
+
+        for (self.literal[self.i..], 0..) |c, i| {
+            if (i == expected_len) break;
+
+            const char = std.fmt.charToDigit(c, 16) catch {
+                break;
+            };
+
+            val, const overflow = @shlWithOverflow(val, 4);
+            overflowed = overflowed or overflow != 0;
+            val |= char;
+            count += 1;
+        }
+        self.i += expected_len;
+
+        if (overflowed) {
+            self.err(.escape_sequence_overflow, .{ .unsigned = start });
+            return null;
+        }
+
+        if (count != expected_len) {
+            self.err(.incomplete_universal_character, .{ .none = {} });
+            return null;
+        }
+
+        if (val > std.math.maxInt(u21) or !std.unicode.utf8ValidCodepoint(@intCast(val))) {
+            self.err(.invalid_universal_character, .{ .unsigned = start });
+            return null;
+        }
+
+        if (val > self.kind.maxCodepoint(self.comp)) {
+            self.err(.char_too_large, .{ .none = {} });
+        }
+
+        if (val < 0xA0 and (val != '$' and val != '@' and val != '`')) {
+            const is_error = !self.comp.langopts.standard.atLeast(.c2x);
+            if (val >= 0x20 and val <= 0x7F) {
+                if (is_error) {
+                    self.err(.ucn_basic_char_error, .{ .ascii = @intCast(val) });
+                } else {
+                    self.warn(.ucn_basic_char_warning, .{ .ascii = @intCast(val) });
+                }
+            } else {
+                if (is_error) {
+                    self.err(.ucn_control_char_error, .{ .none = {} });
+                } else {
+                    self.warn(.ucn_control_char_warning, .{ .none = {} });
+                }
+            }
+        }
+
+        self.warn(.c89_ucn_in_literal, .{ .none = {} });
+        return .{ .value = val };
+    }
+
+    fn parseEscapedChar(self: *Parser) Item {
+        self.i += 1;
+        const c = self.literal[self.i];
+        defer if (c != 'x' and (c < '0' or c > '7')) {
+            self.i += 1;
+        };
+
+        switch (c) {
+            '\n' => unreachable, // removed by line splicing
+            '\r' => unreachable, // removed by line splicing
+            '\'', '\"', '\\', '?' => return .{ .value = c },
+            'n' => return .{ .value = '\n' },
+            'r' => return .{ .value = '\r' },
+            't' => return .{ .value = '\t' },
+            'a' => return .{ .value = 0x07 },
+            'b' => return .{ .value = 0x08 },
+            'e', 'E' => {
+                self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
+                return .{ .value = 0x1B };
+            },
+            '(', '{', '[', '%' => {
+                self.warn(.non_standard_escape_char, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
+                return .{ .value = c };
+            },
+            'f' => return .{ .value = 0x0C },
+            'v' => return .{ .value = 0x0B },
+            'x' => return .{ .value = self.parseNumberEscape(.hex) },
+            '0'...'7' => return .{ .value = self.parseNumberEscape(.octal) },
+            'u', 'U' => unreachable, // handled by parseUnicodeEscape
+            else => {
+                self.warn(.unknown_escape_sequence, .{ .invalid_escape = .{ .char = c, .offset = @intCast(self.i) } });
+                return .{ .value = c };
+            },
+        }
+    }
+
+    fn parseNumberEscape(self: *Parser, base: EscapeBase) u32 {
+        var val: u32 = 0;
+        var count: usize = 0;
+        var overflowed = false;
+        defer self.i += count;
+        const slice = switch (base) {
+            .octal => self.literal[self.i..@min(self.literal.len, self.i + 3)], // max 3 chars
+            .hex => blk: {
+                self.i += 1;
+                break :blk self.literal[self.i..]; // skip over 'x'; could have an arbitrary number of chars
+            },
+        };
+        for (slice) |c| {
+            const char = std.fmt.charToDigit(c, @intFromEnum(base)) catch break;
+            val, const overflow = @shlWithOverflow(val, base.log2());
+            if (overflow != 0) overflowed = true;
+            val += char;
+            count += 1;
+        }
+        if (overflowed or val > self.kind.maxInt(self.comp)) {
+            self.err(.escape_sequence_overflow, .{ .unsigned = 0 });
+        }
+        if (count == 0) {
+            std.debug.assert(base == .hex);
+            self.err(.missing_hex_escape, .{ .ascii = 'x' });
+        }
+        return val;
+    }
+};
+
+const EscapeBase = enum(u8) {
+    octal = 8,
+    hex = 16,
+
+    fn log2(base: EscapeBase) u4 {
+        return switch (base) {
+            .octal => 3,
+            .hex => 4,
+        };
+    }
+};
--- a/deps/aro/CodeGen.zig
+++ b/deps/aro/CodeGen.zig
@ -1162,7 +1162,7 @@ fn genBoolExpr(c: *CodeGen, base: NodeIndex, true_label: Ir.Ref, false_label: Ir
 fn genBuiltinCall(c: *CodeGen, builtin: BuiltinFunction, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {
    _ = arg_nodes;
    _ = ty;
-    return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{@tagName(builtin.tag)});
+    return c.comp.diag.fatalNoSrc("TODO CodeGen.genBuiltinCall {s}\n", .{BuiltinFunction.nameFromTag(builtin.tag).span()});
 }

 fn genCall(c: *CodeGen, fn_node: NodeIndex, arg_nodes: []const NodeIndex, ty: Type) Error!Ir.Ref {
--- a/deps/aro/Compilation.zig
+++ b/deps/aro/Compilation.zig
@ -100,6 +100,8 @@ generated_buf: std.ArrayList(u8),
 builtins: Builtins = .{},
 types: struct {
    wchar: Type = undefined,
+    uint_least16_t: Type = undefined,
+    uint_least32_t: Type = undefined,
    ptrdiff: Type = undefined,
    size: Type = undefined,
    va_list: Type = undefined,
@ -120,9 +122,8 @@ types: struct {
    int16: Type = .{ .specifier = .invalid },
    int64: Type = .{ .specifier = .invalid },
 } = .{},
-/// Mapping from Source.Id to byte offset of first non-utf8 byte
-invalid_utf8_locs: std.AutoHashMapUnmanaged(Source.Id, u32) = .{},
 string_interner: StringInterner = .{},
+ms_cwd_source_id: ?Source.Id = null,

 pub fn init(gpa: Allocator) Compilation {
    return .{
@ -153,7 +154,6 @@ pub fn deinit(comp: *Compilation) void {
    comp.pragma_handlers.deinit();
    comp.generated_buf.deinit();
    comp.builtins.deinit(comp.gpa);
-    comp.invalid_utf8_locs.deinit(comp.gpa);
    comp.string_interner.deinit(comp.gpa);
 }

@ -635,11 +635,25 @@ fn generateBuiltinTypes(comp: *Compilation) !void {
        .intptr = intptr,
        .int16 = int16,
        .int64 = int64,
+        .uint_least16_t = comp.intLeastN(16, .unsigned),
+        .uint_least32_t = comp.intLeastN(32, .unsigned),
    };

    try comp.generateNsConstantStringType();
 }

+/// Smallest integer type with at least N bits
+fn intLeastN(comp: *const Compilation, bits: usize, signedness: std.builtin.Signedness) Type {
+    const candidates = switch (signedness) {
+        .signed => &[_]Type.Specifier{ .schar, .short, .int, .long, .long_long },
+        .unsigned => &[_]Type.Specifier{ .uchar, .ushort, .uint, .ulong, .ulong_long },
+    };
+    for (candidates) |specifier| {
+        const ty: Type = .{ .specifier = specifier };
+        if (ty.sizeof(comp).? * 8 >= bits) return ty;
+    } else unreachable;
+}
+
 fn intSize(comp: *const Compilation, specifier: Type.Specifier) u64 {
    const ty = Type{ .specifier = specifier };
    return ty.sizeof(comp).?;
@ -944,21 +958,29 @@ pub fn getSource(comp: *const Compilation, id: Source.Id) Source {
        .buf = comp.generated_buf.items,
        .id = .generated,
        .splice_locs = &.{},
+        .kind = .user,
    };
    return comp.sources.values()[@intFromEnum(id) - 2];
 }

 /// Creates a Source from the contents of `reader` and adds it to the Compilation
-/// Performs newline splicing, line-ending normalization to '\n', and UTF-8 validation.
-/// caller retains ownership of `path`
-/// `expected_size` will be allocated to hold the contents of `reader` and *must* be at least
-/// as large as the entire contents of `reader`.
-/// To add a pre-existing buffer as a Source, see addSourceFromBuffer
-/// To add a file's contents given its path, see addSourceFromPath
-pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8, expected_size: u32) !Source {
-    var contents = try comp.gpa.alloc(u8, expected_size);
+pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8, kind: Source.Kind) !Source {
+    const contents = try reader.readAllAlloc(comp.gpa, std.math.maxInt(u32));
    errdefer comp.gpa.free(contents);
+    return comp.addSourceFromOwnedBuffer(contents, path, kind);
+}

+/// Creates a Source from `buf` and adds it to the Compilation
+/// Performs newline splicing and line-ending normalization to '\n'
+/// `buf` will be modified and the allocation will be resized if newline splicing
+/// or line-ending changes happen.
+/// caller retains ownership of `path`
+/// To add the contents of an arbitrary reader as a Source, see addSourceFromReader
+/// To add a file's contents given its path, see addSourceFromPath
+pub fn addSourceFromOwnedBuffer(comp: *Compilation, buf: []u8, path: []const u8, kind: Source.Kind) !Source {
+    try comp.sources.ensureUnusedCapacity(1);
+
+    var contents = buf;
    const duped_path = try comp.gpa.dupe(u8, path);
    errdefer comp.gpa.free(duped_path);

@ -981,11 +1003,7 @@ pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8
    } = .beginning_of_file;
    var line: u32 = 1;

-    while (true) {
-        const byte = reader.readByte() catch |err| switch (err) {
-            error.EndOfStream => break,
-            else => |e| return e,
-        };
+    for (contents) |byte| {
        contents[i] = byte;

        switch (byte) {
@ -1083,33 +1101,40 @@ pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8
    errdefer comp.gpa.free(splice_locs);

    if (i != contents.len) contents = try comp.gpa.realloc(contents, i);
+    errdefer @compileError("errdefers in callers would possibly free the realloced slice using the original len");

    var source = Source{
        .id = source_id,
        .path = duped_path,
        .buf = contents,
        .splice_locs = splice_locs,
+        .kind = kind,
    };

-    try comp.sources.put(duped_path, source);
-    if (source.offsetOfInvalidUtf8()) |offset| {
-        try comp.invalid_utf8_locs.putNoClobber(comp.gpa, source_id, offset);
-    }
+    comp.sources.putAssumeCapacityNoClobber(duped_path, source);
    return source;
 }

 /// Caller retains ownership of `path` and `buf`.
+/// Dupes the source buffer; if it is acceptable to modify the source buffer and possibly resize
+/// the allocation, please use `addSourceFromOwnedBuffer`
 pub fn addSourceFromBuffer(comp: *Compilation, path: []const u8, buf: []const u8) !Source {
    if (comp.sources.get(path)) |some| return some;
+    if (@as(u64, buf.len) > std.math.maxInt(u32)) return error.StreamTooLong;

-    const size = std.math.cast(u32, buf.len) orelse return error.StreamTooLong;
-    var buf_reader = std.io.fixedBufferStream(buf);
+    const contents = try comp.gpa.dupe(u8, buf);
+    errdefer comp.gpa.free(contents);

-    return comp.addSourceFromReader(buf_reader.reader(), path, size);
+    return comp.addSourceFromOwnedBuffer(contents, path, .user);
 }

-/// Caller retains ownership of `path`
+/// Caller retains ownership of `path`.
 pub fn addSourceFromPath(comp: *Compilation, path: []const u8) !Source {
+    return comp.addSourceFromPathExtra(path, .user);
+}
+
+/// Caller retains ownership of `path`.
+fn addSourceFromPathExtra(comp: *Compilation, path: []const u8, kind: Source.Kind) !Source {
    if (comp.sources.get(path)) |some| return some;

    if (mem.indexOfScalar(u8, path, 0) != null) {
@ -1119,10 +1144,13 @@ pub fn addSourceFromPath(comp: *Compilation, path: []const u8) !Source {
    const file = try std.fs.cwd().openFile(path, .{});
    defer file.close();

-    const size = std.math.cast(u32, try file.getEndPos()) orelse return error.StreamTooLong;
-    var buf_reader = std.io.bufferedReader(file.reader());
+    const contents = file.readToEndAlloc(comp.gpa, std.math.maxInt(u32)) catch |err| switch (err) {
+        error.FileTooBig => return error.StreamTooLong,
+        else => |e| return e,
+    };
+    errdefer comp.gpa.free(contents);

-    return comp.addSourceFromReader(buf_reader.reader(), path, size);
+    return comp.addSourceFromOwnedBuffer(contents, path, kind);
 }

 pub const IncludeDirIterator = struct {
@ -1130,28 +1158,46 @@ pub const IncludeDirIterator = struct {
    cwd_source_id: ?Source.Id,
    include_dirs_idx: usize = 0,
    sys_include_dirs_idx: usize = 0,
+    tried_ms_cwd: bool = false,

-    fn next(self: *IncludeDirIterator) ?[]const u8 {
+    const FoundSource = struct {
+        path: []const u8,
+        kind: Source.Kind,
+    };
+
+    fn next(self: *IncludeDirIterator) ?FoundSource {
        if (self.cwd_source_id) |source_id| {
            self.cwd_source_id = null;
            const path = self.comp.getSource(source_id).path;
-            return std.fs.path.dirname(path) orelse ".";
+            return .{ .path = std.fs.path.dirname(path) orelse ".", .kind = .user };
        }
        if (self.include_dirs_idx < self.comp.include_dirs.items.len) {
            defer self.include_dirs_idx += 1;
-            return self.comp.include_dirs.items[self.include_dirs_idx];
+            return .{ .path = self.comp.include_dirs.items[self.include_dirs_idx], .kind = .user };
        }
        if (self.sys_include_dirs_idx < self.comp.system_include_dirs.items.len) {
            defer self.sys_include_dirs_idx += 1;
-            return self.comp.system_include_dirs.items[self.sys_include_dirs_idx];
+            return .{ .path = self.comp.system_include_dirs.items[self.sys_include_dirs_idx], .kind = .system };
+        }
+        if (self.comp.ms_cwd_source_id) |source_id| {
+            if (self.tried_ms_cwd) return null;
+            self.tried_ms_cwd = true;
+            const path = self.comp.getSource(source_id).path;
+            return .{ .path = std.fs.path.dirname(path) orelse ".", .kind = .user };
        }
        return null;
    }

-    /// Returned value must be freed by allocator
-    fn nextWithFile(self: *IncludeDirIterator, filename: []const u8, allocator: Allocator) !?[]const u8 {
-        while (self.next()) |dir| {
-            return try std.fs.path.join(allocator, &.{ dir, filename });
+    /// Returned value's path field must be freed by allocator
+    fn nextWithFile(self: *IncludeDirIterator, filename: []const u8, allocator: Allocator) !?FoundSource {
+        while (self.next()) |found| {
+            const path = try std.fs.path.join(allocator, &.{ found.path, filename });
+            if (self.comp.langopts.ms_extensions) {
+                for (path) |*c| {
+                    if (c.* == '\\') c.* = '/';
+                }
+            }
+            return .{ .path = path, .kind = found.kind };
        }
        return null;
    }
@ -1161,8 +1207,8 @@ pub const IncludeDirIterator = struct {
    fn skipUntilDirMatch(self: *IncludeDirIterator, source: Source.Id) void {
        const path = self.comp.getSource(source).path;
        const includer_path = std.fs.path.dirname(path) orelse ".";
-        while (self.next()) |dir| {
-            if (mem.eql(u8, includer_path, dir)) break;
+        while (self.next()) |found| {
+            if (mem.eql(u8, includer_path, found.path)) break;
        }
    }
 };
@ -1196,9 +1242,9 @@ pub fn hasInclude(

    var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);

-    while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
-        defer stack_fallback.get().free(path);
-        if (!std.meta.isError(cwd.access(path, .{}))) return true;
+    while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
+        defer stack_fallback.get().free(found.path);
+        if (!std.meta.isError(cwd.access(found.path, .{}))) return true;
    }
    return false;
 }
@ -1247,9 +1293,9 @@ pub fn findEmbed(
    var it = IncludeDirIterator{ .comp = comp, .cwd_source_id = cwd_source_id };
    var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);

-    while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
-        defer stack_fallback.get().free(path);
-        if (comp.getFileContents(path)) |some|
+    while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
+        defer stack_fallback.get().free(found.path);
+        if (comp.getFileContents(found.path)) |some|
            return some
        else |err| switch (err) {
            error.OutOfMemory => return error.OutOfMemory,
@ -1262,7 +1308,7 @@ pub fn findEmbed(
 pub fn findInclude(
    comp: *Compilation,
    filename: []const u8,
-    includer_token_source: Source.Id,
+    includer_token: Token,
    /// angle bracket vs quotes
    include_type: IncludeType,
    /// include vs include_next
@ -1270,6 +1316,7 @@ pub fn findInclude(
 ) !?Source {
    if (std.fs.path.isAbsolute(filename)) {
        if (which == .next) return null;
+        // TODO: classify absolute file as belonging to system includes or not?
        return if (comp.addSourceFromPath(filename)) |some|
            some
        else |err| switch (err) {
@ -1279,7 +1326,7 @@ pub fn findInclude(
    }
    const cwd_source_id = switch (include_type) {
        .quotes => switch (which) {
-            .first => includer_token_source,
+            .first => includer_token.source,
            .next => null,
        },
        .angle_brackets => null,
@ -1287,15 +1334,26 @@ pub fn findInclude(
    var it = IncludeDirIterator{ .comp = comp, .cwd_source_id = cwd_source_id };

    if (which == .next) {
-        it.skipUntilDirMatch(includer_token_source);
+        it.skipUntilDirMatch(includer_token.source);
    }

    var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);
-    while (try it.nextWithFile(filename, stack_fallback.get())) |path| {
-        defer stack_fallback.get().free(path);
-        if (comp.addSourceFromPath(path)) |some|
-            return some
-        else |err| switch (err) {
+    while (try it.nextWithFile(filename, stack_fallback.get())) |found| {
+        defer stack_fallback.get().free(found.path);
+        if (comp.addSourceFromPathExtra(found.path, found.kind)) |some| {
+            if (it.tried_ms_cwd) {
+                try comp.diag.add(.{
+                    .tag = .ms_search_rule,
+                    .extra = .{ .str = some.path },
+                    .loc = .{
+                        .id = includer_token.source,
+                        .byte_offset = includer_token.start,
+                        .line = includer_token.line,
+                    },
+                }, &.{});
+            }
+            return some;
+        } else |err| switch (err) {
            error.OutOfMemory => return error.OutOfMemory,
            else => {},
        }
@ -1358,9 +1416,7 @@ pub fn hasBuiltin(comp: *const Compilation, name: []const u8) bool {
        std.mem.eql(u8, name, "__builtin_offsetof") or
        std.mem.eql(u8, name, "__builtin_types_compatible_p")) return true;

-    @setEvalBranchQuota(10_000);
-    const tag = std.meta.stringToEnum(BuiltinFunction.Tag, name) orelse return false;
-    const builtin = BuiltinFunction.fromTag(tag);
+    const builtin = BuiltinFunction.fromName(name) orelse return false;
    return comp.hasBuiltinFunction(builtin);
 }

@ -1383,7 +1439,7 @@ test "addSourceFromReader" {
            defer comp.deinit();

            var buf_reader = std.io.fixedBufferStream(str);
-            const source = try comp.addSourceFromReader(buf_reader.reader(), "path", @intCast(str.len));
+            const source = try comp.addSourceFromReader(buf_reader.reader(), "path", .user);

            try std.testing.expectEqualStrings(expected, source.buf);
            try std.testing.expectEqual(warning_count, @as(u32, @intCast(comp.diag.list.items.len)));
@ -1460,32 +1516,26 @@ test "ignore BOM at beginning of file" {
    const BOM = "\xEF\xBB\xBF";

    const Test = struct {
-        fn run(buf: []const u8, input_type: enum { valid_utf8, invalid_utf8 }) !void {
+        fn run(buf: []const u8) !void {
            var comp = Compilation.init(std.testing.allocator);
            defer comp.deinit();

            var buf_reader = std.io.fixedBufferStream(buf);
-            const source = try comp.addSourceFromReader(buf_reader.reader(), "file.c", @intCast(buf.len));
-            switch (input_type) {
-                .valid_utf8 => {
-                    const expected_output = if (mem.startsWith(u8, buf, BOM)) buf[BOM.len..] else buf;
-                    try std.testing.expectEqualStrings(expected_output, source.buf);
-                    try std.testing.expect(!comp.invalid_utf8_locs.contains(source.id));
-                },
-                .invalid_utf8 => try std.testing.expect(comp.invalid_utf8_locs.contains(source.id)),
-            }
+            const source = try comp.addSourceFromReader(buf_reader.reader(), "file.c", .user);
+            const expected_output = if (mem.startsWith(u8, buf, BOM)) buf[BOM.len..] else buf;
+            try std.testing.expectEqualStrings(expected_output, source.buf);
        }
    };

-    try Test.run(BOM, .valid_utf8);
-    try Test.run(BOM ++ "x", .valid_utf8);
-    try Test.run("x" ++ BOM, .valid_utf8);
-    try Test.run(BOM ++ " ", .valid_utf8);
-    try Test.run(BOM ++ "\n", .valid_utf8);
-    try Test.run(BOM ++ "\\", .valid_utf8);
+    try Test.run(BOM);
+    try Test.run(BOM ++ "x");
+    try Test.run("x" ++ BOM);
+    try Test.run(BOM ++ " ");
+    try Test.run(BOM ++ "\n");
+    try Test.run(BOM ++ "\\");

-    try Test.run(BOM[0..1] ++ "x", .invalid_utf8);
-    try Test.run(BOM[0..2] ++ "x", .invalid_utf8);
-    try Test.run(BOM[1..] ++ "x", .invalid_utf8);
-    try Test.run(BOM[2..] ++ "x", .invalid_utf8);
+    try Test.run(BOM[0..1] ++ "x");
+    try Test.run(BOM[0..2] ++ "x");
+    try Test.run(BOM[1..] ++ "x");
+    try Test.run(BOM[2..] ++ "x");
 }
--- a/deps/aro/Diagnostics.zig
+++ b/deps/aro/Diagnostics.zig
@ -54,6 +54,10 @@ pub const Message = struct {
            builtin: BuiltinFunction.Tag,
            header: Header,
        },
+        invalid_escape: struct {
+            offset: u32,
+            char: u8,
+        },
        actual_codepoint: u21,
        ascii: u7,
        unsigned: u64,
@ -114,6 +118,7 @@ pub const Options = packed struct {
    @"c99-compat": Kind = .default,
    @"unicode-zero-width": Kind = .default,
    @"unicode-homoglyph": Kind = .default,
+    unicode: Kind = .default,
    @"return-type": Kind = .default,
    @"dollar-in-identifier-extension": Kind = .default,
    @"unknown-pragmas": Kind = .default,
@ -168,6 +173,11 @@ pub const Options = packed struct {
    @"fuse-ld-path": Kind = .default,
    @"language-extension-token": Kind = .default,
    @"complex-component-init": Kind = .default,
+    @"microsoft-include": Kind = .default,
+    @"microsoft-end-of-file": Kind = .default,
+    @"invalid-source-encoding": Kind = .default,
+    @"four-char-constants": Kind = .default,
+    @"unknown-escape-sequence": Kind = .default,
 };

 const messages = struct {
@ -372,6 +382,10 @@ const messages = struct {
        const kind = .warning;
        const all = true;
    };
+    pub const missing_type_specifier_c2x = struct {
+        const msg = "a type specifier is required for all declarations";
+        const kind = .@"error";
+    };
    pub const multiple_storage_class = struct {
        const msg = "cannot combine with previous '{s}' declaration specifier";
        const extra = .str;
@ -831,15 +845,20 @@ const messages = struct {
        const msg = "invalid universal character";
        const kind = .@"error";
    };
-    pub const multichar_literal = struct {
+    pub const incomplete_universal_character = struct {
+        const msg = "incomplete universal character name";
+        const kind = .@"error";
+    };
+    pub const multichar_literal_warning = struct {
        const msg = "multi-character character constant";
        const opt = "multichar";
        const kind = .warning;
        const all = true;
    };
-    pub const unicode_multichar_literal = struct {
-        const msg = "Unicode character literals may not contain multiple characters";
+    pub const invalid_multichar_literal = struct {
+        const msg = "{s} character literals may not contain multiple characters";
        const kind = .@"error";
+        const extra = .str;
    };
    pub const wide_multichar_literal = struct {
        const msg = "extraneous characters in character constant ignored";
@ -1474,6 +1493,16 @@ const messages = struct {
        const opt = "c99-compat";
        const kind = .off;
    };
+    pub const unexpected_character = struct {
+        const msg = "unexpected character <U+{X:0>4}>";
+        const extra = .actual_codepoint;
+        const kind = .@"error";
+    };
+    pub const invalid_identifier_start_char = struct {
+        const msg = "character <U+{X:0>4}> not allowed at the start of an identifier";
+        const extra = .actual_codepoint;
+        const kind = .@"error";
+    };
    pub const unicode_zero_width = struct {
        const msg = "identifier contains Unicode character <U+{X:0>4}> that is invisible in some environments";
        const opt = "unicode-homoglyph";
@ -1797,9 +1826,10 @@ const messages = struct {
        const kind = .warning;
    };
    pub const non_standard_escape_char = struct {
-        const msg = "use of non-standard escape character '\\e'";
+        const msg = "use of non-standard escape character '\\{s}'";
        const kind = .off;
        const opt = "pedantic";
+        const extra = .invalid_escape;
    };
    pub const invalid_pp_stringify_escape = struct {
        const msg = "invalid string literal, ignoring final '\\'";
@ -2399,7 +2429,6 @@ const messages = struct {
        const opt = "pedantic";
        const extra = .str;
        const kind = .off;
-        const pedantic = true;
    };
    pub const not_floating_type = struct {
        const msg = "argument type '{s}' is not a real floating point type";
@ -2411,6 +2440,75 @@ const messages = struct {
        const extra = .str;
        const kind = .@"error";
    };
+    pub const ms_search_rule = struct {
+        const msg = "#include resolved using non-portable Microsoft search rules as: {s}";
+        const extra = .str;
+        const opt = "microsoft-include";
+        const kind = .warning;
+    };
+    pub const ctrl_z_eof = struct {
+        const msg = "treating Ctrl-Z as end-of-file is a Microsoft extension";
+        const opt = "microsoft-end-of-file";
+        const kind = .off;
+        const pedantic = true;
+    };
+    pub const illegal_char_encoding_warning = struct {
+        const msg = "illegal character encoding in character literal";
+        const opt = "invalid-source-encoding";
+        const kind = .warning;
+    };
+    pub const illegal_char_encoding_error = struct {
+        const msg = "illegal character encoding in character literal";
+        const kind = .@"error";
+    };
+    pub const ucn_basic_char_error = struct {
+        const msg = "character '{c}' cannot be specified by a universal character name";
+        const kind = .@"error";
+        const extra = .ascii;
+    };
+    pub const ucn_basic_char_warning = struct {
+        const msg = "specifying character '{c}' with a universal character name is incompatible with C standards before C2x";
+        const kind = .off;
+        const extra = .ascii;
+        const suppress_unless_version = .c2x;
+        const opt = "pre-c2x-compat";
+    };
+    pub const ucn_control_char_error = struct {
+        const msg = "universal character name refers to a control character";
+        const kind = .@"error";
+    };
+    pub const ucn_control_char_warning = struct {
+        const msg = "universal character name referring to a control character is incompatible with C standards before C2x";
+        const kind = .off;
+        const suppress_unless_version = .c2x;
+        const opt = "pre-c2x-compat";
+    };
+    pub const c89_ucn_in_literal = struct {
+        const msg = "universal character names are only valid in C99 or later";
+        const suppress_version = .c99;
+        const kind = .warning;
+        const opt = "unicode";
+    };
+    pub const four_char_char_literal = struct {
+        const msg = "multi-character character constant";
+        const opt = "four-char-constants";
+        const kind = .off;
+    };
+    pub const multi_char_char_literal = struct {
+        const msg = "multi-character character constant";
+        const kind = .off;
+    };
+    pub const missing_hex_escape = struct {
+        const msg = "\\{c} used with no following hex digits";
+        const kind = .@"error";
+        const extra = .ascii;
+    };
+    pub const unknown_escape_sequence = struct {
+        const msg = "unknown escape sequence '\\{s}'";
+        const kind = .warning;
+        const opt = "unknown-escape-sequence";
+        const extra = .invalid_escape;
+    };
 };

 list: std.ArrayListUnmanaged(Message) = .{},
@ -2585,9 +2683,11 @@ pub fn renderMessage(comp: *Compilation, m: anytype, msg: Message) void {
        switch (msg.tag) {
            .escape_sequence_overflow,
            .invalid_universal_character,
-            .non_standard_escape_char,
            // use msg.extra.unsigned for index into string literal
            => loc.byte_offset += @truncate(msg.extra.unsigned),
+            .non_standard_escape_char,
+            .unknown_escape_sequence,
+            => loc.byte_offset += msg.extra.invalid_escape.offset,
            else => {},
        }
        const source = comp.getSource(loc.id);
@ -2650,8 +2750,18 @@ pub fn renderMessage(comp: *Compilation, m: anytype, msg: Message) void {
                    }),
                    .builtin_with_header => m.print(info.msg, .{
                        @tagName(msg.extra.builtin_with_header.header),
-                        @tagName(msg.extra.builtin_with_header.builtin),
+                        BuiltinFunction.nameFromTag(msg.extra.builtin_with_header.builtin).span(),
                    }),
+                    .invalid_escape => {
+                        if (std.ascii.isPrint(msg.extra.invalid_escape.char)) {
+                            const str: [1]u8 = .{msg.extra.invalid_escape.char};
+                            m.print(info.msg, .{&str});
+                        } else {
+                            var buf: [3]u8 = undefined;
+                            _ = std.fmt.bufPrint(&buf, "x{x}", .{std.fmt.fmtSliceHexLower(&.{msg.extra.invalid_escape.char})}) catch unreachable;
+                            m.print(info.msg, .{&buf});
+                        }
+                    },
                    else => @compileError("invalid extra kind " ++ @tagName(info.extra)),
                }
            } else {
@ -2770,6 +2880,7 @@ const MsgWriter = struct {
    fn end(m: *MsgWriter, maybe_line: ?[]const u8, col: u32, end_with_splice: bool) void {
        const line = maybe_line orelse {
            m.write("\n");
+            m.setColor(.reset);
            return;
        };
        const trailer = if (end_with_splice) "\\ " else "";
--- a/deps/aro/Driver.zig
+++ b/deps/aro/Driver.zig
@ -28,6 +28,10 @@ link_objects: std.ArrayListUnmanaged([]const u8) = .{},
 output_name: ?[]const u8 = null,
 sysroot: ?[]const u8 = null,
 temp_file_count: u32 = 0,
+/// If false, do not emit line directives in -E mode
+line_commands: bool = true,
+/// If true, use `#line <num>` instead of `# <num>` for line directives
+use_line_directives: bool = false,
 only_preprocess: bool = false,
 only_syntax: bool = false,
 only_compile: bool = false,
@ -111,11 +115,15 @@ pub const usage =
    \\  -fsyntax-only           Only run the preprocessor, parser, and semantic analysis stages
    \\  -funsigned-char         "char" is unsigned
    \\  -fno-unsigned-char      "char" is signed
+    \\  -fuse-line-directives   Use `#line <num>` linemarkers in preprocessed output
+    \\  -fno-use-line-directives
+    \\                          Use `# <num>` linemarkers in preprocessed output
    \\  -I <dir>                Add directory to include search path
    \\  -isystem                Add directory to SYSTEM include search path
    \\  --emulate=[clang|gcc|msvc]
    \\                          Select which C compiler to emulate (default clang)
    \\  -o <file>               Write output to <file>
+    \\  -P, --no-line-commands  Disable linemarker output in -E mode
    \\  -pedantic               Warn on language extensions
    \\  --rtlib=<arg>           Compiler runtime library to use (libgcc or compiler-rt)
    \\  -std=<standard>         Specify language standard
@ -169,6 +177,7 @@ pub fn parseArgs(
        off,
        unset,
    } = .unset;
+    var comment_arg: []const u8 = "";
    while (i < args.len) : (i += 1) {
        const arg = args[i];
        if (mem.startsWith(u8, arg, "-") and arg.len > 1) {
@ -213,6 +222,12 @@ pub fn parseArgs(
                d.only_compile = true;
            } else if (mem.eql(u8, arg, "-E")) {
                d.only_preprocess = true;
+            } else if (mem.eql(u8, arg, "-P") or mem.eql(u8, arg, "--no-line-commands")) {
+                d.line_commands = false;
+            } else if (mem.eql(u8, arg, "-fuse-line-directives")) {
+                d.use_line_directives = true;
+            } else if (mem.eql(u8, arg, "-fno-use-line-directives")) {
+                d.use_line_directives = false;
            } else if (mem.eql(u8, arg, "-fchar8_t")) {
                d.comp.langopts.has_char8_t_override = true;
            } else if (mem.eql(u8, arg, "-fno-char8_t")) {
@ -358,6 +373,13 @@ pub fn parseArgs(
                d.verbose_ir = true;
            } else if (mem.eql(u8, arg, "--verbose-linker-args")) {
                d.verbose_linker_args = true;
+            } else if (mem.eql(u8, arg, "-C") or mem.eql(u8, arg, "--comments")) {
+                d.comp.langopts.preserve_comments = true;
+                comment_arg = arg;
+            } else if (mem.eql(u8, arg, "-CC") or mem.eql(u8, arg, "--comments-in-macros")) {
+                d.comp.langopts.preserve_comments = true;
+                d.comp.langopts.preserve_comments_in_macros = true;
+                comment_arg = arg;
            } else if (option(arg, "-fuse-ld=")) |linker_name| {
                d.use_linker = linker_name;
            } else if (mem.eql(u8, arg, "-fuse-ld=")) {
@ -419,6 +441,9 @@ pub fn parseArgs(
        .off => false,
        .unset => util.fileSupportsColor(std.io.getStdErr()) and !std.process.hasEnvVarConstant("NO_COLOR"),
    };
+    if (d.comp.langopts.preserve_comments and !d.only_preprocess) {
+        return d.fatal("invalid argument '{s}' only allowed with '-E'", .{comment_arg});
+    }
    return false;
 }

@ -518,12 +543,25 @@ fn processSource(
    var pp = Preprocessor.init(d.comp);
    defer pp.deinit();

+    if (d.comp.langopts.ms_extensions) {
+        d.comp.ms_cwd_source_id = source.id;
+    }
+
    if (d.verbose_pp) pp.verbose = true;
-    if (d.only_preprocess) pp.preserve_whitespace = true;
+    if (d.only_preprocess) {
+        pp.preserve_whitespace = true;
+        if (d.line_commands) {
+            pp.linemarkers = if (d.use_line_directives) .line_directives else .numeric_directives;
+        }
+    }
    try pp.addBuiltinMacros();

+    try pp.addIncludeStart(source);
+    try pp.addIncludeStart(builtin);
    _ = try pp.preprocess(builtin);
+    try pp.addIncludeStart(user_macros);
    _ = try pp.preprocess(user_macros);
+    try pp.addIncludeResume(source.id, 0, 0);
    const eof = try pp.preprocess(source);
    try pp.tokens.append(pp.comp.gpa, eof);

--- a/deps/aro/LangOpts.zig
+++ b/deps/aro/LangOpts.zig
@ -1,5 +1,6 @@
 const std = @import("std");
 const DiagnosticTag = @import("Diagnostics.zig").Tag;
+const CharInfo = @import("CharInfo.zig");

 const LangOpts = @This();

@ -85,6 +86,20 @@ pub const Standard = enum {
            .c2x, .gnu2x => "202311L",
        };
    }
+
+    pub fn codepointAllowedInIdentifier(standard: Standard, codepoint: u21, is_start: bool) bool {
+        if (is_start) {
+            return if (standard.atLeast(.c11))
+                CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
+            else
+                CharInfo.isC99IdChar(codepoint) and !CharInfo.isC99DisallowedInitialIDChar(codepoint);
+        } else {
+            return if (standard.atLeast(.c11))
+                CharInfo.isC11IdChar(codepoint)
+            else
+                CharInfo.isC99IdChar(codepoint);
+        }
+    }
 };

 emulate: Compiler = .clang,
@ -110,6 +125,11 @@ has_char8_t_override: ?bool = null,
 /// Whether to allow GNU-style inline assembly
 gnu_asm: bool = true,

+/// Preserve comments when preprocessing
+preserve_comments: bool = false,
+/// Preserve comments in macros when preprocessing
+preserve_comments_in_macros: bool = false,
+
 pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void {
    self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard;
 }
--- a/deps/aro/Parser.zig
+++ b/deps/aro/Parser.zig
@ -17,6 +17,7 @@ const NodeList = std.ArrayList(NodeIndex);
 const InitList = @import("InitList.zig");
 const Attribute = @import("Attribute.zig");
 const CharInfo = @import("CharInfo.zig");
+const CharLiteral = @import("CharLiteral.zig");
 const Value = @import("Value.zig");
 const SymbolStack = @import("SymbolStack.zig");
 const Symbol = SymbolStack.Symbol;
@ -186,15 +187,18 @@ string_ids: struct {
    ucontext_t: StringId,
 },

-fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool {
-    if (codepoint <= 0x7F) return false;
-    var diagnosed = false;
+/// Checks codepoint for various pedantic warnings
+/// Returns true if diagnostic issued
+fn checkIdentifierCodepointWarnings(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool {
+    assert(codepoint >= 0x80);
+
+    const err_start = comp.diag.list.items.len;
+
    if (!CharInfo.isC99IdChar(codepoint)) {
        try comp.diag.add(.{
            .tag = .c99_compat,
            .loc = loc,
        }, &.{});
-        diagnosed = true;
    }
    if (CharInfo.isInvisible(codepoint)) {
        try comp.diag.add(.{
@ -202,7 +206,6 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
            .loc = loc,
            .extra = .{ .actual_codepoint = codepoint },
        }, &.{});
-        diagnosed = true;
    }
    if (CharInfo.homoglyph(codepoint)) |resembles| {
        try comp.diag.add(.{
@ -210,31 +213,78 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
            .loc = loc,
            .extra = .{ .codepoints = .{ .actual = codepoint, .resembles = resembles } },
        }, &.{});
-        diagnosed = true;
    }
-    return diagnosed;
+    return comp.diag.list.items.len != err_start;
+}
+
+/// Issues diagnostics for the current extended identifier token
+/// Return value indicates whether the token should be considered an identifier
+/// true means consider the token to actually be an identifier
+/// false means it is not
+fn validateExtendedIdentifier(p: *Parser) !bool {
+    assert(p.tok_ids[p.tok_i] == .extended_identifier);
+
+    const slice = p.tokSlice(p.tok_i);
+    const view = std.unicode.Utf8View.init(slice) catch {
+        try p.errTok(.invalid_utf8, p.tok_i);
+        return error.FatalError;
+    };
+    var it = view.iterator();
+
+    var valid_identifier = true;
+    var warned = false;
+    var len: usize = 0;
+    var invalid_char: u21 = undefined;
+    var loc = p.pp.tokens.items(.loc)[p.tok_i];
+
+    const standard = p.comp.langopts.standard;
+    while (it.nextCodepoint()) |codepoint| {
+        defer {
+            len += 1;
+            loc.byte_offset += std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
+        }
+        if (codepoint == '$') {
+            warned = true;
+            try p.comp.diag.add(.{
+                .tag = .dollar_in_identifier_extension,
+                .loc = loc,
+            }, &.{});
+        }
+
+        if (codepoint <= 0x7F) continue;
+        if (!valid_identifier) continue;
+
+        const allowed = standard.codepointAllowedInIdentifier(codepoint, len == 0);
+        if (!allowed) {
+            invalid_char = codepoint;
+            valid_identifier = false;
+            continue;
+        }
+
+        if (!warned) {
+            warned = try checkIdentifierCodepointWarnings(p.comp, codepoint, loc);
+        }
+    }
+
+    if (!valid_identifier) {
+        if (len == 1) {
+            try p.errExtra(.unexpected_character, p.tok_i, .{ .actual_codepoint = invalid_char });
+            return false;
+        } else {
+            try p.errExtra(.invalid_identifier_start_char, p.tok_i, .{ .actual_codepoint = invalid_char });
+        }
+    }
+
+    return true;
 }

 fn eatIdentifier(p: *Parser) !?TokenIndex {
    switch (p.tok_ids[p.tok_i]) {
        .identifier => {},
        .extended_identifier => {
-            const slice = p.tokSlice(p.tok_i);
-            var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
-            var loc = p.pp.tokens.items(.loc)[p.tok_i];
-
-            if (mem.indexOfScalar(u8, slice, '$')) |i| {
-                loc.byte_offset += @intCast(i);
-                try p.comp.diag.add(.{
-                    .tag = .dollar_in_identifier_extension,
-                    .loc = loc,
-                }, &.{});
-                loc = p.pp.tokens.items(.loc)[p.tok_i];
-            }
-
-            while (it.nextCodepoint()) |c| {
-                if (try checkIdentifierCodepoint(p.comp, c, loc)) break;
-                loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
+            if (!try p.validateExtendedIdentifier()) {
+                p.tok_i += 1;
+                return null;
            }
        },
        else => return null,
@ -566,6 +616,7 @@ fn diagnoseIncompleteDefinitions(p: *Parser) !void {

 /// root : (decl | assembly ';' | staticAssert)*
 pub fn parse(pp: *Preprocessor) Compilation.Error!Tree {
+    assert(pp.linemarkers == .none);
    pp.comp.pragmaEvent(.before_parse);

    var arena = std.heap.ArenaAllocator.init(pp.comp.gpa);
@ -1692,10 +1743,10 @@ fn initDeclarator(p: *Parser, decl_spec: *DeclSpec, attr_buf_top: usize) Error!?
                try p.errStr(.tentative_array, name, try p.typeStr(init_d.d.ty));
                break :incomplete;
            } else if (init_d.d.ty.getRecord()) |record| {
-                _ = try p.tentative_defs.getOrPutValue(p.comp.gpa, record.name, init_d.d.name);
+                _ = try p.tentative_defs.getOrPutValue(p.gpa, record.name, init_d.d.name);
                break :incomplete;
            } else if (init_d.d.ty.get(.@"enum")) |en| {
-                _ = try p.tentative_defs.getOrPutValue(p.comp.gpa, en.data.@"enum".name, init_d.d.name);
+                _ = try p.tentative_defs.getOrPutValue(p.gpa, en.data.@"enum".name, init_d.d.name);
                break :incomplete;
            }
        }
@ -2078,7 +2129,7 @@ fn recordSpec(p: *Parser) Error!Type {
            // TODO: msvc considers `#pragma pack` on a per-field basis
            .msvc => p.pragma_pack,
        };
-        record_layout.compute(record_ty, ty, p.pp.comp, pragma_pack_value);
+        record_layout.compute(record_ty, ty, p.comp, pragma_pack_value);
    }

    // finish by creating a node
@ -2651,6 +2702,7 @@ fn enumerator(p: *Parser, e: *Enumerator) Error!?EnumFieldAndNode {
            .node = res.node,
        } },
    });
+    try p.value_map.put(node, e.res.val);
    return EnumFieldAndNode{ .field = .{
        .name = interned_name,
        .ty = res.ty,
@ -3355,7 +3407,7 @@ fn findScalarInitializer(p: *Parser, il: **InitList, ty: *Type, actual_ty: Type,
        return false;
    } else if (ty.get(.@"struct")) |struct_ty| {
        if (il.*.node != .none) return false;
-        if (actual_ty.eql(ty.*, p.pp.comp, false)) return true;
+        if (actual_ty.eql(ty.*, p.comp, false)) return true;
        const start_index = il.*.list.items.len;
        var index = if (start_index != 0) il.*.list.items[start_index - 1].index + 1 else start_index;

@ -3375,14 +3427,14 @@ fn findScalarInitializer(p: *Parser, il: **InitList, ty: *Type, actual_ty: Type,
        return false;
    } else if (ty.get(.@"union")) |union_ty| {
        if (il.*.node != .none) return false;
-        if (actual_ty.eql(ty.*, p.pp.comp, false)) return true;
+        if (actual_ty.eql(ty.*, p.comp, false)) return true;
        if (union_ty.data.record.fields.len == 0) {
            try p.errTok(.empty_aggregate_init_braces, first_tok);
            return error.ParsingFailed;
        }
        ty.* = union_ty.data.record.fields[0].ty;
        il.* = try il.*.find(p.gpa, 0);
-        // if (il.*.node == .none and actual_ty.eql(ty, p.pp.comp, false)) return true;
+        // if (il.*.node == .none and actual_ty.eql(ty, p.comp, false)) return true;
        if (try p.findScalarInitializer(il, ty, actual_ty, first_tok)) return true;
        return false;
    }
@ -3708,7 +3760,7 @@ fn gnuAsmStmt(p: *Parser, quals: Tree.GNUAssemblyQualifiers, l_paren: TokenIndex
    const expected_items = 8; // arbitrarily chosen, most assembly will have fewer than 8 inputs/outputs/constraints/names
    const bytes_needed = expected_items * @sizeOf(?TokenIndex) + expected_items * 3 * @sizeOf(NodeIndex);

-    var stack_fallback = std.heap.stackFallback(bytes_needed, p.comp.gpa);
+    var stack_fallback = std.heap.stackFallback(bytes_needed, p.gpa);
    const allocator = stack_fallback.get();

    // TODO: Consider using a TokenIndex of 0 instead of null if we need to store the names in the tree
@ -4572,7 +4624,10 @@ const CallExpr = union(enum) {
        return switch (self) {
            .standard => true,
            .builtin => |builtin| switch (builtin.tag) {
-                .__builtin_va_start, .__va_start, .va_start => arg_idx != 1,
+                BuiltinFunction.tagFromName("__builtin_va_start").?,
+                BuiltinFunction.tagFromName("__va_start").?,
+                BuiltinFunction.tagFromName("va_start").?,
+                => arg_idx != 1,
                else => true,
            },
        };
@ -4582,8 +4637,11 @@ const CallExpr = union(enum) {
        return switch (self) {
            .standard => true,
            .builtin => |builtin| switch (builtin.tag) {
-                .__builtin_va_start, .__va_start, .va_start => arg_idx != 1,
-                .__builtin_complex => false,
+                BuiltinFunction.tagFromName("__builtin_va_start").?,
+                BuiltinFunction.tagFromName("__va_start").?,
+                BuiltinFunction.tagFromName("va_start").?,
+                => arg_idx != 1,
+                BuiltinFunction.tagFromName("__builtin_complex").? => false,
                else => true,
            },
        };
@ -4600,8 +4658,11 @@ const CallExpr = union(enum) {

        const builtin_tok = p.nodes.items(.data)[@intFromEnum(self.builtin.node)].decl.name;
        switch (self.builtin.tag) {
-            .__builtin_va_start, .__va_start, .va_start => return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
-            .__builtin_complex => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
+            BuiltinFunction.tagFromName("__builtin_va_start").?,
+            BuiltinFunction.tagFromName("__va_start").?,
+            BuiltinFunction.tagFromName("va_start").?,
+            => return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
+            BuiltinFunction.tagFromName("__builtin_complex").? => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
            else => {},
        }
    }
@ -4615,7 +4676,7 @@ const CallExpr = union(enum) {
        return switch (self) {
            .standard => null,
            .builtin => |builtin| switch (builtin.tag) {
-                .__builtin_complex => 2,
+                BuiltinFunction.tagFromName("__builtin_complex").? => 2,
                else => null,
            },
        };
@ -4625,7 +4686,7 @@ const CallExpr = union(enum) {
        return switch (self) {
            .standard => callable_ty.returnType(),
            .builtin => |builtin| switch (builtin.tag) {
-                .__builtin_complex => {
+                BuiltinFunction.tagFromName("__builtin_complex").? => {
                    const last_param = p.list_buf.items[p.list_buf.items.len - 1];
                    return p.nodes.items(.ty)[@intFromEnum(last_param)].makeComplex();
                },
@ -7518,7 +7579,7 @@ fn stringLiteral(p: *Parser) Error!Result {
                        'a' => p.retained_strings.appendAssumeCapacity(0x07),
                        'b' => p.retained_strings.appendAssumeCapacity(0x08),
                        'e' => {
-                            try p.errExtra(.non_standard_escape_char, start, .{ .unsigned = i - 1 });
+                            try p.errExtra(.non_standard_escape_char, start, .{ .invalid_escape = .{ .char = 'e', .offset = @intCast(i) } });
                            p.retained_strings.appendAssumeCapacity(0x1B);
                        },
                        'f' => p.retained_strings.appendAssumeCapacity(0x0C),
@ -7584,130 +7645,82 @@ fn parseUnicodeEscape(p: *Parser, tok: TokenIndex, count: u8, slice: []const u8,

 fn charLiteral(p: *Parser) Error!Result {
    defer p.tok_i += 1;
-    const allow_multibyte = switch (p.tok_ids[p.tok_i]) {
-        .char_literal => false,
-        .char_literal_utf_8 => false,
-        .char_literal_wide => true,
-        .char_literal_utf_16 => true,
-        .char_literal_utf_32 => true,
-        else => unreachable,
-    };
-    const ty: Type = switch (p.tok_ids[p.tok_i]) {
-        .char_literal => .{ .specifier = .int },
-        .char_literal_utf_8 => .{ .specifier = .uchar },
-        .char_literal_wide => p.comp.types.wchar,
-        .char_literal_utf_16 => .{ .specifier = .ushort },
-        .char_literal_utf_32 => .{ .specifier = .ulong },
-        else => unreachable,
-    };
-    const max: u32 = switch (p.tok_ids[p.tok_i]) {
-        .char_literal => std.math.maxInt(u8),
-        .char_literal_wide => @intCast(p.comp.types.wchar.maxInt(p.comp)),
-        .char_literal_utf_8 => std.math.maxInt(u8),
-        .char_literal_utf_16 => std.math.maxInt(u16),
-        .char_literal_utf_32 => std.math.maxInt(u32),
-        else => unreachable,
-    };
-    var multichar: u8 = switch (p.tok_ids[p.tok_i]) {
-        .char_literal => 0,
-        .char_literal_wide => 4,
-        .char_literal_utf_8 => 2,
-        .char_literal_utf_16 => 2,
-        .char_literal_utf_32 => 2,
-        else => unreachable,
-    };
-
+    const tok_id = p.tok_ids[p.tok_i];
+    const char_kind = CharLiteral.Kind.classify(tok_id);
    var val: u32 = 0;
-    var overflow_reported = false;
-    var slice = p.tokSlice(p.tok_i);
-    slice = slice[0 .. slice.len - 1];
-    var i = mem.indexOf(u8, slice, "\'").? + 1;
-    while (i < slice.len) : (i += 1) {
-        var c: u32 = slice[i];
-        var multibyte = false;
-        switch (c) {
-            '\\' => {
-                i += 1;
-                switch (slice[i]) {
-                    '\n' => i += 1,
-                    '\r' => i += 2,
-                    '\'', '\"', '\\', '?' => c = slice[i],
-                    'n' => c = '\n',
-                    'r' => c = '\r',
-                    't' => c = '\t',
-                    'a' => c = 0x07,
-                    'b' => c = 0x08,
-                    'e' => {
-                        try p.errExtra(.non_standard_escape_char, p.tok_i, .{ .unsigned = i - 1 });
-                        c = 0x1B;
-                    },
-                    'f' => c = 0x0C,
-                    'v' => c = 0x0B,
-                    'x' => c = try p.parseNumberEscape(p.tok_i, 16, slice, &i),
-                    '0'...'7' => c = try p.parseNumberEscape(p.tok_i, 8, slice, &i),
-                    'u', 'U' => return p.todo("unicode escapes in char literals"),
-                    else => unreachable,
+
+    const slice = char_kind.contentSlice(p.tokSlice(p.tok_i));
+
+    if (slice.len == 1 and std.ascii.isASCII(slice[0])) {
+        // fast path: single unescaped ASCII char
+        val = slice[0];
+    } else {
+        var char_literal_parser = CharLiteral.Parser.init(slice, char_kind, p.comp);
+
+        const max_chars_expected = 4;
+        var stack_fallback = std.heap.stackFallback(max_chars_expected * @sizeOf(u32), p.comp.gpa);
+        var chars = std.ArrayList(u32).initCapacity(stack_fallback.get(), max_chars_expected) catch unreachable; // stack allocation already succeeded
+        defer chars.deinit();
+
+        while (char_literal_parser.next()) |item| switch (item) {
+            .value => |c| try chars.append(c),
+            .improperly_encoded => |s| {
+                try chars.ensureUnusedCapacity(s.len);
+                for (s) |c| chars.appendAssumeCapacity(c);
+            },
+            .utf8_text => |view| {
+                var it = view.iterator();
+                var max_codepoint: u21 = 0;
+                try chars.ensureUnusedCapacity(view.bytes.len);
+                while (it.nextCodepoint()) |c| {
+                    max_codepoint = @max(max_codepoint, c);
+                    chars.appendAssumeCapacity(c);
+                }
+                if (max_codepoint > char_kind.maxCodepoint(p.comp)) {
+                    char_literal_parser.err(.char_too_large, .{ .none = {} });
                }
            },
-            // These are safe since the source is checked to be valid utf8.
-            0b1100_0000...0b1101_1111 => {
-                c &= 0b00011111;
-                c <<= 6;
-                c |= slice[i + 1] & 0b00111111;
-                i += 1;
-                multibyte = true;
-            },
-            0b1110_0000...0b1110_1111 => {
-                c &= 0b00001111;
-                c <<= 6;
-                c |= slice[i + 1] & 0b00111111;
-                c <<= 6;
-                c |= slice[i + 2] & 0b00111111;
-                i += 2;
-                multibyte = true;
-            },
-            0b1111_0000...0b1111_0111 => {
-                c &= 0b00000111;
-                c <<= 6;
-                c |= slice[i + 1] & 0b00111111;
-                c <<= 6;
-                c |= slice[i + 2] & 0b00111111;
-                c <<= 6;
-                c |= slice[i + 3] & 0b00111111;
-                i += 3;
-                multibyte = true;
-            },
-            else => {},
+        };
+
+        const is_multichar = chars.items.len > 1;
+        if (is_multichar) {
+            if (char_kind == .char and chars.items.len == 4) {
+                char_literal_parser.warn(.four_char_char_literal, .{ .none = {} });
+            } else if (char_kind == .char) {
+                char_literal_parser.warn(.multichar_literal_warning, .{ .none = {} });
+            } else {
+                const kind = switch (char_kind) {
+                    .wide => "wide",
+                    .utf_8, .utf_16, .utf_32 => "Unicode",
+                    else => unreachable,
+                };
+                char_literal_parser.err(.invalid_multichar_literal, .{ .str = kind });
+            }
        }
-        if (c > max or (multibyte and !allow_multibyte)) try p.err(.char_too_large);
-        switch (multichar) {
-            0, 2, 4 => multichar += 1,
-            1 => {
-                multichar = 99;
-                try p.err(.multichar_literal);
-            },
-            3 => {
-                try p.err(.unicode_multichar_literal);
-                return error.ParsingFailed;
-            },
-            5 => {
-                try p.err(.wide_multichar_literal);
-                val = 0;
-                multichar = 6;
-            },
-            6 => val = 0,
-            else => {},
+
+        var multichar_overflow = false;
+        if (char_kind == .char and is_multichar) {
+            for (chars.items) |item| {
+                val, const overflowed = @shlWithOverflow(val, 8);
+                multichar_overflow = multichar_overflow or overflowed != 0;
+                val += @as(u8, @truncate(item));
+            }
+        } else if (chars.items.len > 0) {
+            val = chars.items[chars.items.len - 1];
        }
-        const product, const overflowed = @mulWithOverflow(val, max +% 1);
-        if (overflowed != 0 and !overflow_reported) {
-            try p.errExtra(.char_lit_too_wide, p.tok_i, .{ .unsigned = i });
-            overflow_reported = true;
+
+        if (multichar_overflow) {
+            char_literal_parser.err(.char_lit_too_wide, .{ .none = {} });
+        }
+
+        for (char_literal_parser.errors.constSlice()) |item| {
+            try p.errExtra(item.tag, p.tok_i, item.extra);
        }
-        val = product + c;
    }

+    const ty = char_kind.charLiteralType(p.comp);
    // This is the type the literal will have if we're in a macro; macros always operate on intmax_t/uintmax_t values
-    const macro_ty = if (ty.isUnsignedInt(p.comp) or (p.tok_ids[p.tok_i] == .char_literal and p.comp.getCharSignedness() == .unsigned))
+    const macro_ty = if (ty.isUnsignedInt(p.comp) or (char_kind == .char and p.comp.getCharSignedness() == .unsigned))
        p.comp.types.intmax.makeIntegerUnsigned()
    else
        p.comp.types.intmax;
@ -7892,7 +7905,7 @@ fn bitInt(p: *Parser, base: u8, buf: []const u8, suffix: NumberSuffix, tok_i: To
    try p.errStr(.pre_c2x_compat, tok_i, "'_BitInt' suffix for literals");
    try p.errTok(.bitint_suffix, tok_i);

-    var managed = try big.int.Managed.init(p.comp.gpa);
+    var managed = try big.int.Managed.init(p.gpa);
    defer managed.deinit();

    managed.setString(base, buf) catch |e| switch (e) {
--- a/deps/aro/Preprocessor.zig
+++ b/deps/aro/Preprocessor.zig
@ -89,6 +89,18 @@ top_expansion_buf: ExpandBuf,
 verbose: bool = false,
 preserve_whitespace: bool = false,

+/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
+linemarkers: Linemarkers = .none,
+
+pub const Linemarkers = enum {
+    /// No linemarker tokens. Required setting if parser will run
+    none,
+    /// #line <num> "filename"
+    line_directives,
+    /// # <num> "filename" flags
+    numeric_directives,
+};
+
 pub fn init(comp: *Compilation) Preprocessor {
    const pp = Preprocessor{
        .comp = comp,
@ -111,6 +123,10 @@ const builtin_macros = struct {
        .id = .macro_param_has_attribute,
        .source = .generated,
    }};
+    const has_declspec_attribute = [1]RawToken{.{
+        .id = .macro_param_has_declspec_attribute,
+        .source = .generated,
+    }};
    const has_warning = [1]RawToken{.{
        .id = .macro_param_has_warning,
        .source = .generated,
@ -173,6 +189,7 @@ fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: [

 pub fn addBuiltinMacros(pp: *Preprocessor) !void {
    try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute);
+    try pp.addBuiltinMacro("__has_declspec_attribute", true, &builtin_macros.has_declspec_attribute);
    try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning);
    try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature);
    try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension);
@ -201,11 +218,52 @@ pub fn deinit(pp: *Preprocessor) void {

 /// Preprocess a source file, returns eof token.
 pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token {
-    return pp.preprocessExtra(source) catch |er| switch (er) {
+    const eof = pp.preprocessExtra(source) catch |er| switch (er) {
        // This cannot occur in the main file and is handled in `include`.
        error.StopPreprocessing => unreachable,
        else => |e| return e,
    };
+    try eof.checkMsEof(source, pp.comp);
+    return eof;
+}
+
+/// Tokenize a file without any preprocessing, returns eof token.
+pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
+    assert(pp.linemarkers == .none);
+    assert(pp.preserve_whitespace == false);
+    var tokenizer = Tokenizer{
+        .buf = source.buf,
+        .comp = pp.comp,
+        .source = source.id,
+    };
+
+    // Estimate how many new tokens this source will contain.
+    const estimated_token_count = source.buf.len / 8;
+    try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
+
+    while (true) {
+        var tok = tokenizer.next();
+        if (tok.id == .eof) return tokFromRaw(tok);
+        try pp.tokens.append(pp.gpa, tokFromRaw(tok));
+    }
+}
+
+pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
+    if (pp.linemarkers == .none) return;
+    try pp.tokens.append(pp.gpa, .{ .id = .include_start, .loc = .{
+        .id = source.id,
+        .byte_offset = std.math.maxInt(u32),
+        .line = 0,
+    } });
+}
+
+pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
+    if (pp.linemarkers == .none) return;
+    try pp.tokens.append(pp.gpa, .{ .id = .include_resume, .loc = .{
+        .id = source,
+        .byte_offset = offset,
+        .line = line,
+    } });
 }

 /// Return the name of the #ifndef guard macro that starts a source, if any.
@ -226,14 +284,6 @@ fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
 }

 fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
-    if (pp.comp.invalid_utf8_locs.get(source.id)) |offset| {
-        try pp.comp.diag.add(.{
-            .tag = .invalid_utf8,
-            // Todo: compute line number
-            .loc = .{ .id = source.id, .byte_offset = offset },
-        }, &.{});
-        return error.FatalError;
-    }
    var guard_name = pp.findIncludeGuard(source);

    pp.preprocess_count += 1;
@ -493,7 +543,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
                        _ = pp.defines.remove(macro_name);
                        try pp.expectNl(&tokenizer);
                    },
-                    .keyword_include => try pp.include(&tokenizer, .first),
+                    .keyword_include => {
+                        try pp.include(&tokenizer, .first);
+                        continue;
+                    },
                    .keyword_include_next => {
                        try pp.comp.diag.add(.{
                            .tag = .include_next,
@ -510,7 +563,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
                        }
                    },
                    .keyword_embed => try pp.embed(&tokenizer),
-                    .keyword_pragma => try pp.pragma(&tokenizer, directive, null, &.{}),
+                    .keyword_pragma => {
+                        try pp.pragma(&tokenizer, directive, null, &.{});
+                        continue;
+                    },
                    .keyword_line => {
                        // #line number "file"
                        const digits = tokenizer.nextNoWS();
@ -551,6 +607,10 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
                        skipToNl(&tokenizer);
                    },
                }
+                if (pp.preserve_whitespace) {
+                    tok.id = .nl;
+                    try pp.tokens.append(pp.gpa, tokFromRaw(tok));
+                }
            },
            .whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)),
            .nl => {
@ -928,6 +988,12 @@ fn skip(
            line_start = true;
            tokenizer.index += 1;
            tokenizer.line += 1;
+            if (pp.preserve_whitespace) {
+                try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
+                    .id = tokenizer.source,
+                    .line = tokenizer.line,
+                } });
+            }
        } else {
            line_start = false;
            tokenizer.index += 1;
@ -980,9 +1046,14 @@ fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf
            .hash_hash => {
                var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
                i += 1;
-                while (rhs.id == .whitespace) {
-                    rhs = tokFromRaw(simple_macro.tokens[i + 1]);
-                    i += 1;
+                while (true) {
+                    if (rhs.id == .whitespace) {
+                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
+                        i += 1;
+                    } else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
+                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
+                        i += 1;
+                    } else break;
                }
                try pp.pasteTokens(&buf, &.{rhs});
            },
@ -1168,7 +1239,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
    }

    for (params) |tok| {
-        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
+        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws, false);
        try pp.char_buf.appendSlice(str);
    }

@ -1212,6 +1283,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]co
 fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool {
    switch (builtin) {
        .macro_param_has_attribute,
+        .macro_param_has_declspec_attribute,
        .macro_param_has_feature,
        .macro_param_has_extension,
        .macro_param_has_builtin,
@ -1220,6 +1292,7 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
            var identifier: ?Token = null;
            for (param_toks) |tok| {
                if (tok.id == .macro_ws) continue;
+                if (tok.id == .comment) continue;
                if (!tok.id.isMacroIdentifier()) {
                    invalid = tok;
                    break;
@ -1238,6 +1311,12 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
            const ident_str = pp.expandedSlice(identifier.?);
            return switch (builtin) {
                .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
+                .macro_param_has_declspec_attribute => {
+                    return if (pp.comp.langopts.declspec_attrs)
+                        Attribute.fromString(.declspec, null, ident_str) != null
+                    else
+                        false;
+                },
                .macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
                .macro_param_has_extension => features.hasExtension(pp.comp, ident_str),
                .macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
@ -1272,6 +1351,7 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
            var identifier: ?Token = null;
            for (param_toks) |tok| switch (tok.id) {
                .macro_ws => continue,
+                .comment => continue,
                else => {
                    if (identifier) |_| invalid = tok else identifier = tok;
                },
@ -1353,6 +1433,10 @@ fn expandFuncMacro(
                const next = switch (raw_next.id) {
                    .macro_ws => continue,
                    .hash_hash => continue,
+                    .comment => if (!pp.comp.langopts.preserve_comments_in_macros)
+                        continue
+                    else
+                        &[1]Token{tokFromRaw(raw_next)},
                    .macro_param, .macro_param_no_expand => if (args.items[raw_next.end].len > 0)
                        args.items[raw_next.end]
                    else
@ -1396,6 +1480,7 @@ fn expandFuncMacro(
                try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
            },
            .macro_param_has_attribute,
+            .macro_param_has_declspec_attribute,
            .macro_param_has_warning,
            .macro_param_has_feature,
            .macro_param_has_extension,
@ -1426,6 +1511,7 @@ fn expandFuncMacro(
                        if (string) |_| invalid = tok else string = tok;
                    },
                    .macro_ws => continue,
+                    .comment => continue,
                    else => {
                        invalid = tok;
                        break;
@ -1881,18 +1967,30 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
            Token.free(tok.expansion_locs, pp.gpa);
            continue;
        }
+        if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
+            Token.free(tok.expansion_locs, pp.gpa);
+            continue;
+        }
        tok.id.simplifyMacroKeywordExtra(true);
        pp.tokens.appendAssumeCapacity(tok.*);
    }
    if (pp.preserve_whitespace) {
        try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl);
        while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
-            pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ .id = .generated } });
+            pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{
+                .id = tokenizer.source,
+                .line = tokenizer.line,
+            } });
        }
    }
 }

-fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
+fn expandedSliceExtra(
+    pp: *const Preprocessor,
+    tok: Token,
+    macro_ws_handling: enum { single_macro_ws, preserve_macro_ws },
+    path_escapes: bool,
+) []const u8 {
    if (tok.id.lexeme()) |some| {
        if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
    }
@ -1901,6 +1999,7 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en
        .comp = pp.comp,
        .index = tok.loc.byte_offset,
        .source = .generated,
+        .path_escapes = path_escapes,
    };
    if (tok.id == .macro_string) {
        while (true) : (tmp_tokenizer.index += 1) {
@ -1914,23 +2013,27 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en

 /// Get expanded token source string.
 pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
-    return pp.expandedSliceExtra(tok, .single_macro_ws);
+    return pp.expandedSliceExtra(tok, .single_macro_ws, false);
 }

 /// Concat two tokens and add the result to pp.generated
 fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void {
    const lhs = while (lhs_toks.popOrNull()) |lhs| {
-        if (lhs.id == .macro_ws)
-            Token.free(lhs.expansion_locs, pp.gpa)
-        else
+        if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
+            (lhs.id != .macro_ws and lhs.id != .comment))
            break lhs;
+
+        Token.free(lhs.expansion_locs, pp.gpa);
    } else {
        return bufCopyTokens(lhs_toks, rhs_toks, &.{});
    };

    var rhs_rest: u32 = 1;
    const rhs = for (rhs_toks) |rhs| {
-        if (rhs.id != .macro_ws) break rhs;
+        if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
+            (rhs.id != .macro_ws and rhs.id != .comment))
+            break rhs;
+
        rhs_rest += 1;
    } else {
        return lhs_toks.appendAssumeCapacity(lhs);
@ -1952,9 +2055,15 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
        .index = @intCast(start),
        .source = .generated,
    };
-    const pasted_token = tmp_tokenizer.nextNoWS();
-    const next = tmp_tokenizer.nextNoWS().id;
-    if (next != .nl and next != .eof) {
+    const pasted_token = tmp_tokenizer.nextNoWSComments();
+    const next = tmp_tokenizer.nextNoWSComments();
+    const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
+        .placemarker
+    else
+        pasted_token.id;
+    try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
+
+    if (next.id != .nl and next.id != .eof) {
        try pp.comp.diag.add(.{
            .tag = .pasting_formed_invalid,
            .loc = lhs.loc,
@ -1963,13 +2072,9 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
                pp.comp.generated_buf.items[start..end],
            ) },
        }, lhs.expansionSlice());
+        try lhs_toks.append(tokFromRaw(next));
    }

-    const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
-        .placemarker
-    else
-        pasted_token.id;
-    try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
    try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
 }

@ -2053,7 +2158,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
        tok.id.simplifyMacroKeyword();
        switch (tok.id) {
            .hash_hash => {
-                const next = tokenizer.nextNoWS();
+                const next = tokenizer.nextNoWSComments();
                switch (next.id) {
                    .nl, .eof => {
                        try pp.err(tok, .hash_hash_at_end);
@ -2069,6 +2174,13 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
                try pp.token_buf.append(next);
            },
            .nl, .eof => break tok.start,
+            .comment => if (pp.comp.langopts.preserve_comments_in_macros) {
+                if (need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
+                }
+                try pp.token_buf.append(tok);
+            },
            .whitespace => need_ws = true,
            else => {
                if (tok.id != .whitespace and need_ws) {
@ -2152,6 +2264,13 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
        switch (tok.id) {
            .nl, .eof => break tok.start,
            .whitespace => need_ws = pp.token_buf.items.len != 0,
+            .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
+                if (need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
+                }
+                try pp.token_buf.append(tok);
+            },
            .hash => {
                if (tok.id != .whitespace and need_ws) {
                    need_ws = false;
@ -2192,7 +2311,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
                    return skipToNl(tokenizer);
                }
                const saved_tokenizer = tokenizer.*;
-                const next = tokenizer.nextNoWS();
+                const next = tokenizer.nextNoWSComments();
                if (next.id == .nl or next.id == .eof) {
                    try pp.err(tok, .hash_hash_at_end);
                    return;
@ -2249,6 +2368,8 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa

 /// Handle an #embed directive
 fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
+    tokenizer.path_escapes = true;
+    defer tokenizer.path_escapes = false;
    const first = tokenizer.nextNoWS();
    const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) {
        error.InvalidInclude => return,
@ -2256,7 +2377,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
    };

    // Check for empty filename.
-    const tok_slice = pp.expandedSlice(filename_tok);
+    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
    if (tok_slice.len < 3) {
        try pp.err(first, .empty_filename);
        return;
@ -2298,6 +2419,8 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {

 // Handle a #include directive.
 fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
+    tokenizer.path_escapes = true;
+    defer tokenizer.path_escapes = false;
    const first = tokenizer.nextNoWS();
    const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
        error.InvalidInclude => return,
@ -2323,10 +2446,32 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc
        pp.verboseLog(first, "include file {s}", .{new_source.path});
    }

-    _ = pp.preprocessExtra(new_source) catch |er| switch (er) {
-        error.StopPreprocessing => {},
+    const tokens_start = pp.tokens.len;
+    try pp.addIncludeStart(new_source);
+    const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
+        error.StopPreprocessing => {
+            for (pp.tokens.items(.expansion_locs)[tokens_start..]) |loc| Token.free(loc, pp.gpa);
+            pp.tokens.len = tokens_start;
+            return;
+        },
        else => |e| return e,
    };
+    try eof.checkMsEof(new_source, pp.comp);
+    if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
+        try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
+            .id = tokenizer.source,
+            .line = tokenizer.line,
+        } });
+    }
+    if (pp.linemarkers == .none) return;
+    var next = first;
+    while (true) {
+        var tmp = tokenizer.*;
+        next = tmp.nextNoWS();
+        if (next.id != .nl) break;
+        tokenizer.* = tmp;
+    }
+    try pp.addIncludeResume(next.source, next.end, next.line);
 }

 /// tokens that are part of a pragma directive can happen in 3 ways:
@ -2441,7 +2586,7 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
    const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);

    // Check for empty filename.
-    const tok_slice = pp.expandedSlice(filename_tok);
+    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws, true);
    if (tok_slice.len < 3) {
        try pp.err(first, .empty_filename);
        return error.InvalidInclude;
@ -2455,28 +2600,90 @@ fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken,
        else => unreachable,
    };

-    return (try pp.comp.findInclude(filename, first.source, include_type, which)) orelse
+    return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
        pp.fatal(first, "'{s}' not found", .{filename});
 }

+fn printLinemarker(
+    pp: *Preprocessor,
+    w: anytype,
+    line_no: u32,
+    source: Source,
+    start_resume: enum(u8) { start, @"resume", none },
+) !void {
+    try w.writeByte('#');
+    if (pp.linemarkers == .line_directives) try w.writeAll("line");
+    // line_no is 0 indexed
+    try w.print(" {d} \"{s}\"", .{ line_no + 1, source.path });
+    if (pp.linemarkers == .numeric_directives) {
+        switch (start_resume) {
+            .none => {},
+            .start => try w.writeAll(" 1"),
+            .@"resume" => try w.writeAll(" 2"),
+        }
+        switch (source.kind) {
+            .user => {},
+            .system => try w.writeAll(" 3"),
+            .extern_c_system => try w.writeAll(" 3 4"),
+        }
+    }
+    try w.writeByte('\n');
+}
+
+// After how many empty lines are needed to replace them with linemarkers.
+const collapse_newlines = 8;
+
 /// Pretty print tokens and try to preserve whitespace.
 pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
+    const tok_ids = pp.tokens.items(.id);
+
    var i: u32 = 0;
-    while (true) : (i += 1) {
+    var last_nl = true;
+    outer: while (true) : (i += 1) {
        var cur: Token = pp.tokens.get(i);
        switch (cur.id) {
            .eof => {
-                if (pp.tokens.len > 1 and pp.tokens.items(.id)[i - 1] != .nl) try w.writeByte('\n');
-                break;
+                if (!last_nl) try w.writeByte('\n');
+                return;
+            },
+            .nl => {
+                var newlines: u32 = 0;
+                for (tok_ids[i..], i..) |id, j| {
+                    if (id == .nl) {
+                        newlines += 1;
+                    } else if (id == .eof) {
+                        if (!last_nl) try w.writeByte('\n');
+                        return;
+                    } else if (id != .whitespace) {
+                        if (pp.linemarkers == .none) {
+                            if (newlines < 2) break;
+                        } else if (newlines < collapse_newlines) {
+                            break;
+                        }
+
+                        i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
+                        if (!last_nl) try w.writeAll("\n");
+                        if (pp.linemarkers != .none) {
+                            const next = pp.tokens.get(i);
+                            const source = pp.comp.getSource(next.loc.id);
+                            const line_col = source.lineCol(next.loc);
+                            try pp.printLinemarker(w, line_col.line_no, source, .none);
+                            last_nl = true;
+                        }
+                        continue :outer;
+                    }
+                }
+                last_nl = true;
+                try w.writeAll("\n");
            },
-            .nl => try w.writeAll("\n"),
            .keyword_pragma => {
                const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
-                const end_idx = mem.indexOfScalarPos(Token.Id, pp.tokens.items(.id), i, .nl) orelse i + 1;
+                const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
                const pragma_len = @as(u32, @intCast(end_idx)) - i;

                if (pp.comp.getPragma(pragma_name)) |prag| {
                    if (!prag.shouldPreserveTokens(pp, i + 1)) {
+                        try w.writeByte('\n');
                        i += pragma_len;
                        cur = pp.tokens.get(i);
                        continue;
@ -2488,6 +2695,7 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
                    cur = pp.tokens.get(i);
                    if (cur.id == .nl) {
                        try w.writeByte('\n');
+                        last_nl = true;
                        break;
                    }
                    try w.writeByte(' ');
@ -2498,14 +2706,30 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
            .whitespace => {
                var slice = pp.expandedSlice(cur);
                while (mem.indexOfScalar(u8, slice, '\n')) |some| {
-                    try w.writeByte('\n');
+                    if (pp.linemarkers != .none) try w.writeByte('\n');
                    slice = slice[some + 1 ..];
                }
                for (slice) |_| try w.writeByte(' ');
+                last_nl = false;
+            },
+            .include_start => {
+                const source = pp.comp.getSource(cur.loc.id);
+
+                try pp.printLinemarker(w, 0, source, .start);
+                last_nl = true;
+            },
+            .include_resume => {
+                const source = pp.comp.getSource(cur.loc.id);
+                const line_col = source.lineCol(cur.loc);
+                if (!last_nl) try w.writeAll("\n");
+
+                try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
+                last_nl = true;
            },
            else => {
                const slice = pp.expandedSlice(cur);
                try w.writeAll(slice);
+                last_nl = false;
            },
        }
    }
@ -2527,6 +2751,7 @@ test "Preserve pragma tokens sometimes" {
            defer pp.deinit();

            pp.preserve_whitespace = true;
+            assert(pp.linemarkers == .none);

            const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
            const eof = try pp.preprocess(test_runner_macros);
@ -2557,13 +2782,14 @@ test "Preserve pragma tokens sometimes" {
        \\#pragma once
        \\
    ;
-    try Test.check(omit_once, "int x;\n");
+    // TODO should only be one newline afterwards when emulating clang
+    try Test.check(omit_once, "\nint x;\n\n");

    const omit_poison =
        \\#pragma GCC poison foobar
        \\
    ;
-    try Test.check(omit_poison, "");
+    try Test.check(omit_poison, "\n");
 }

 test "destringify" {
--- a/deps/aro/README.md
+++ b/deps/aro/README.md
@ -0,0 +1,24 @@
+# Aro
+A C compiler with the goal of providing fast compilation and low memory usage with good diagnostics.
+
+Aro is included as an alternative C frontend in the [Zig compiler](https://github.com/ziglang/zig)
+for `translate-c` and eventually compiling C files by translating them to Zig first.
+Aro is developed in https://github.com/Vexu/arocc and the Zig dependency is
+updated from there when needed.
+
+Currently most of standard C is supported up to C23 and as are many of the common
+extensions from GNU, MSVC, and Clang
+
+Basic code generation is supported for x86-64 linux and can produce a valid hello world:
+```sh-session
+$ cat hello.c
+extern int printf(const char *restrict fmt, ...);
+int main(void) {
+    printf("Hello, world!\n");
+    return 0;
+}
+$ zig build run -- hello.c -o hello
+$ ./hello
+Hello, world!
+$
+```
--- a/deps/aro/Source.zig
+++ b/deps/aro/Source.zig
@ -7,6 +7,16 @@ pub const Id = enum(u32) {
    _,
 };

+/// Classifies the file for line marker output in -E mode
+pub const Kind = enum {
+    /// regular file
+    user,
+    /// Included from a system include directory
+    system,
+    /// Included from an "implicit extern C" directory
+    extern_c_system,
+};
+
 pub const Location = struct {
    id: Id = .unused,
    byte_offset: u32 = 0,
@ -24,6 +34,7 @@ id: Id,
 /// from the original raw buffer. The same position can appear multiple times if multiple
 /// consecutive splices happened. Guaranteed to be non-decreasing
 splice_locs: []const u32,
+kind: Kind,

 /// Todo: binary search instead of scanning entire `splice_locs`.
 pub fn numSplicesBefore(source: Source, byte_offset: u32) u32 {
@ -59,7 +70,10 @@ pub fn lineCol(source: Source, loc: Location) LineCol {
    var width: u32 = 0;

    while (i < loc.byte_offset) : (col += 1) { // TODO this is still incorrect, but better
-        const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch unreachable;
+        const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch {
+            i += 1;
+            continue;
+        };
        const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch unreachable;
        width += codepointWidth(cp);
        i += len;
@ -107,19 +121,3 @@ fn codepointWidth(cp: u32) u32 {
        else => 1,
    };
 }
-
-/// Returns the first offset, if any, in buf where an invalid utf8 sequence
-/// is found. Code adapted from std.unicode.utf8ValidateSlice
-pub fn offsetOfInvalidUtf8(self: Source) ?u32 {
-    const buf = self.buf;
-    std.debug.assert(buf.len <= std.math.maxInt(u32));
-    var i: u32 = 0;
-    while (i < buf.len) {
-        if (std.unicode.utf8ByteSequenceLength(buf[i])) |cp_len| {
-            if (i + cp_len > buf.len) return i;
-            if (std.meta.isError(std.unicode.utf8Decode(buf[i .. i + cp_len]))) return i;
-            i += cp_len;
-        } else |_| return i;
-    }
-    return null;
-}
--- a/deps/aro/SymbolStack.zig
+++ b/deps/aro/SymbolStack.zig
@ -48,7 +48,7 @@ pub fn scopeEnd(s: SymbolStack) u32 {
 }

 pub fn pushScope(s: *SymbolStack, p: *Parser) !void {
-    try s.scopes.append(p.pp.comp.gpa, @intCast(s.syms.len));
+    try s.scopes.append(p.gpa, @intCast(s.syms.len));
 }

 pub fn popScope(s: *SymbolStack) void {
@ -154,7 +154,7 @@ pub fn defineTypedef(
        switch (kinds[i]) {
            .typedef => if (names[i] == name) {
                const prev_ty = s.syms.items(.ty)[i];
-                if (ty.eql(prev_ty, p.pp.comp, true)) break;
+                if (ty.eql(prev_ty, p.comp, true)) break;
                try p.errStr(.redefinition_of_typedef, tok, try p.typePairStrExtra(ty, " vs ", prev_ty));
                const previous_tok = s.syms.items(.tok)[i];
                if (previous_tok != 0) try p.errTok(.previous_definition, previous_tok);
@ -163,7 +163,7 @@ pub fn defineTypedef(
            else => {},
        }
    }
-    try s.syms.append(p.pp.comp.gpa, .{
+    try s.syms.append(p.gpa, .{
        .kind = .typedef,
        .name = name,
        .tok = tok,
@ -197,7 +197,7 @@ pub fn defineSymbol(
            },
            .decl => if (names[i] == name) {
                const prev_ty = s.syms.items(.ty)[i];
-                if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
+                if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
                    try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
                    try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
                }
@ -211,7 +211,7 @@ pub fn defineSymbol(
            else => {},
        }
    }
-    try s.syms.append(p.pp.comp.gpa, .{
+    try s.syms.append(p.gpa, .{
        .kind = if (constexpr) .constexpr else .def,
        .name = name,
        .tok = tok,
@ -243,7 +243,7 @@ pub fn declareSymbol(
            },
            .decl => if (names[i] == name) {
                const prev_ty = s.syms.items(.ty)[i];
-                if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
+                if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
                    try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
                    try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
                }
@ -251,7 +251,7 @@ pub fn declareSymbol(
            },
            .def, .constexpr => if (names[i] == name) {
                const prev_ty = s.syms.items(.ty)[i];
-                if (!ty.eql(prev_ty, p.pp.comp, true)) { // TODO adjusted equality check
+                if (!ty.eql(prev_ty, p.comp, true)) { // TODO adjusted equality check
                    try p.errStr(.redefinition_incompatible, tok, p.tokSlice(tok));
                    try p.errTok(.previous_definition, s.syms.items(.tok)[i]);
                    break;
@ -261,7 +261,7 @@ pub fn declareSymbol(
            else => {},
        }
    }
-    try s.syms.append(p.pp.comp.gpa, .{
+    try s.syms.append(p.gpa, .{
        .kind = .decl,
        .name = name,
        .tok = tok,
@ -290,7 +290,7 @@ pub fn defineParam(s: *SymbolStack, p: *Parser, name: StringId, ty: Type, tok: T
    if (ty.is(.fp16) and !p.comp.hasHalfPrecisionFloatABI()) {
        try p.errStr(.suggest_pointer_for_invalid_fp16, tok, "parameters");
    }
-    try s.syms.append(p.pp.comp.gpa, .{
+    try s.syms.append(p.gpa, .{
        .kind = .def,
        .name = name,
        .tok = tok,
@ -365,7 +365,7 @@ pub fn defineEnumeration(
            else => {},
        }
    }
-    try s.syms.append(p.pp.comp.gpa, .{
+    try s.syms.append(p.gpa, .{
        .kind = .enumeration,
        .name = name,
        .tok = tok,
--- a/deps/aro/Tokenizer.zig
+++ b/deps/aro/Tokenizer.zig
@ -3,8 +3,6 @@ const assert = std.debug.assert;
 const Compilation = @import("Compilation.zig");
 const Source = @import("Source.zig");
 const LangOpts = @import("LangOpts.zig");
-const CharInfo = @import("CharInfo.zig");
-const unicode = @import("unicode.zig");

 const Tokenizer = @This();

@ -108,6 +106,8 @@ pub const Token = struct {
        macro_ws,
        /// Special token for implementing __has_attribute
        macro_param_has_attribute,
+        /// Special token for implementing __has_declspec_attribute
+        macro_param_has_declspec_attribute,
        /// Special token for implementing __has_warning
        macro_param_has_warning,
        /// Special token for implementing __has_feature
@ -290,6 +290,16 @@ pub const Token = struct {
        /// See C99 6.10.3.3.2
        placemarker,

+        /// Virtual linemarker token output from preprocessor to indicate start of a new include
+        include_start,
+
+        /// Virtual linemarker token output from preprocessor to indicate resuming a file after
+        /// completion of the preceding #include
+        include_resume,
+
+        /// A comment token if asked to preserve comments.
+        comment,
+
        /// Return true if token is identifier or keyword.
        pub fn isMacroIdentifier(id: Id) bool {
            switch (id) {
@ -458,6 +468,10 @@ pub const Token = struct {

        pub fn lexeme(id: Id) ?[]const u8 {
            return switch (id) {
+                .include_start,
+                .include_resume,
+                => unreachable,
+
                .invalid,
                .identifier,
                .extended_identifier,
@ -475,6 +489,7 @@ pub const Token = struct {
                .whitespace,
                .pp_num,
                .embed_byte,
+                .comment,
                => null,

                .zero => "0",
@ -487,6 +502,7 @@ pub const Token = struct {
                .stringify_param,
                .stringify_va_args,
                .macro_param_has_attribute,
+                .macro_param_has_declspec_attribute,
                .macro_param_has_warning,
                .macro_param_has_feature,
                .macro_param_has_extension,
@ -817,24 +833,6 @@ pub const Token = struct {
        };
    }

-    /// Check if codepoint may appear in specified context
-    /// does not check basic character set chars because the tokenizer handles them separately to keep the common
-    /// case on the fast path
-    pub fn mayAppearInIdent(comp: *const Compilation, codepoint: u21, where: enum { start, inside }) bool {
-        if (codepoint == '$') return comp.langopts.dollars_in_identifiers;
-        if (codepoint <= 0x7F) return false;
-        return switch (where) {
-            .start => if (comp.langopts.standard.atLeast(.c11))
-                CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
-            else
-                CharInfo.isC99IdChar(codepoint) and !CharInfo.isC99DisallowedInitialIDChar(codepoint),
-            .inside => if (comp.langopts.standard.atLeast(.c11))
-                CharInfo.isC11IdChar(codepoint)
-            else
-                CharInfo.isC99IdChar(codepoint),
-        };
-    }
-
    const all_kws = std.ComptimeStringMap(Id, .{
        .{ "auto", auto: {
            @setEvalBranchQuota(3000);
@ -986,6 +984,8 @@ index: u32 = 0,
 source: Source.Id,
 comp: *const Compilation,
 line: u32 = 1,
+/// Used to parse include strings with Windows style paths.
+path_escapes: bool = false,

 pub fn next(self: *Tokenizer) Token {
    var state: enum {
@ -996,8 +996,10 @@ pub fn next(self: *Tokenizer) Token {
        U,
        L,
        string_literal,
+        path_escape,
        char_literal_start,
        char_literal,
+        char_escape_sequence,
        escape_sequence,
        octal_escape,
        hex_escape,
@ -1038,18 +1040,8 @@ pub fn next(self: *Tokenizer) Token {

    var return_state = state;
    var counter: u32 = 0;
-    var codepoint_len: u3 = undefined;
-    while (self.index < self.buf.len) : (self.index += codepoint_len) {
-        // Source files get checked for valid utf-8 before being tokenized so it is safe to use
-        // these versions.
-        codepoint_len = unicode.utf8ByteSequenceLength_unsafe(self.buf[self.index]);
-        const c: u21 = switch (codepoint_len) {
-            1 => @as(u21, self.buf[self.index]),
-            2 => unicode.utf8Decode2_unsafe(self.buf[self.index..]),
-            3 => unicode.utf8Decode3_unsafe(self.buf[self.index..]),
-            4 => unicode.utf8Decode4_unsafe(self.buf[self.index..]),
-            else => unreachable,
-        };
+    while (self.index < self.buf.len) : (self.index += 1) {
+        const c = self.buf[self.index];
        switch (state) {
            .start => switch (c) {
                '\n' => {
@ -1137,11 +1129,25 @@ pub fn next(self: *Tokenizer) Token {
                '#' => state = .hash,
                '0'...'9' => state = .pp_num,
                '\t', '\x0B', '\x0C', ' ' => state = .whitespace,
-                else => if (Token.mayAppearInIdent(self.comp, c, .start)) {
+                '$' => if (self.comp.langopts.dollars_in_identifiers) {
                    state = .extended_identifier;
                } else {
                    id = .invalid;
-                    self.index += codepoint_len;
+                    self.index += 1;
+                    break;
+                },
+                0x1A => if (self.comp.langopts.ms_extensions) {
+                    id = .eof;
+                    break;
+                } else {
+                    id = .invalid;
+                    self.index += 1;
+                    break;
+                },
+                0x80...0xFF => state = .extended_identifier,
+                else => {
+                    id = .invalid;
+                    self.index += 1;
                    break;
                },
            },
@ -1165,7 +1171,7 @@ pub fn next(self: *Tokenizer) Token {
                    state = .string_literal;
                },
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = .identifier;
                },
            },
@ -1179,7 +1185,7 @@ pub fn next(self: *Tokenizer) Token {
                    state = .char_literal_start;
                },
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = .identifier;
                },
            },
@ -1193,7 +1199,7 @@ pub fn next(self: *Tokenizer) Token {
                    state = .string_literal;
                },
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = .identifier;
                },
            },
@ -1207,14 +1213,14 @@ pub fn next(self: *Tokenizer) Token {
                    state = .string_literal;
                },
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = .identifier;
                },
            },
            .string_literal => switch (c) {
                '\\' => {
                    return_state = .string_literal;
-                    state = .escape_sequence;
+                    state = if (self.path_escapes) .path_escape else .escape_sequence;
                },
                '"' => {
                    self.index += 1;
@ -1227,12 +1233,13 @@ pub fn next(self: *Tokenizer) Token {
                '\r' => unreachable,
                else => {},
            },
+            .path_escape => {
+                state = .string_literal;
+            },
            .char_literal_start => switch (c) {
                '\\' => {
-                    return_state = .char_literal;
-                    state = .escape_sequence;
+                    state = .char_escape_sequence;
                },
-
                '\'', '\n' => {
                    id = .invalid;
                    break;
@ -1243,8 +1250,7 @@ pub fn next(self: *Tokenizer) Token {
            },
            .char_literal => switch (c) {
                '\\' => {
-                    return_state = .char_literal;
-                    state = .escape_sequence;
+                    state = .char_escape_sequence;
                },
                '\'' => {
                    self.index += 1;
@ -1256,14 +1262,15 @@ pub fn next(self: *Tokenizer) Token {
                },
                else => {},
            },
+            .char_escape_sequence => switch (c) {
+                '\r', '\n' => unreachable, // removed by line splicing
+                else => state = .char_literal,
+            },
            .escape_sequence => switch (c) {
                '\'', '"', '?', '\\', 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v' => {
                    state = return_state;
                },
-                '\n' => {
-                    state = return_state;
-                    self.line += 1;
-                },
+                '\r', '\n' => unreachable, // removed by line splicing
                '0'...'7' => {
                    counter = 1;
                    state = .octal_escape;
@ -1288,14 +1295,14 @@ pub fn next(self: *Tokenizer) Token {
                    if (counter == 3) state = return_state;
                },
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = return_state;
                },
            },
            .hex_escape => switch (c) {
                '0'...'9', 'a'...'f', 'A'...'F' => {},
                else => {
-                    codepoint_len = 0;
+                    self.index -= 1;
                    state = return_state;
                },
            },
@ -1311,12 +1318,16 @@ pub fn next(self: *Tokenizer) Token {
            },
            .identifier, .extended_identifier => switch (c) {
                'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
-                else => {
-                    if (!Token.mayAppearInIdent(self.comp, c, .inside)) {
-                        id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
-                        break;
-                    }
+                '$' => if (self.comp.langopts.dollars_in_identifiers) {
                    state = .extended_identifier;
+                } else {
+                    id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
+                    break;
+                },
+                0x80...0xFF => state = .extended_identifier,
+                else => {
+                    id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
+                    break;
                },
            },
            .equal => switch (c) {
@ -1614,6 +1625,10 @@ pub fn next(self: *Tokenizer) Token {
            },
            .line_comment => switch (c) {
                '\n' => {
+                    if (self.comp.langopts.preserve_comments) {
+                        id = .comment;
+                        break;
+                    }
                    self.index -= 1;
                    state = .start;
                },
@ -1625,7 +1640,14 @@ pub fn next(self: *Tokenizer) Token {
                else => {},
            },
            .multi_line_comment_asterisk => switch (c) {
-                '/' => state = .multi_line_comment_done,
+                '/' => {
+                    if (self.comp.langopts.preserve_comments) {
+                        self.index += 1;
+                        id = .comment;
+                        break;
+                    }
+                    state = .multi_line_comment_done;
+                },
                '\n' => {
                    self.line += 1;
                    state = .multi_line_comment;
@ -1712,9 +1734,11 @@ pub fn next(self: *Tokenizer) Token {
            .extended_identifier => id = .extended_identifier,
            .period2,
            .string_literal,
+            .path_escape,
            .char_literal_start,
            .char_literal,
            .escape_sequence,
+            .char_escape_sequence,
            .octal_escape,
            .hex_escape,
            .unicode_escape,
@ -1761,6 +1785,12 @@ pub fn next(self: *Tokenizer) Token {
 }

 pub fn nextNoWS(self: *Tokenizer) Token {
+    var tok = self.next();
+    while (tok.id == .whitespace or tok.id == .comment) tok = self.next();
+    return tok;
+}
+
+pub fn nextNoWSComments(self: *Tokenizer) Token {
    var tok = self.next();
    while (tok.id == .whitespace) tok = self.next();
    return tok;
--- a/deps/aro/Tree.zig
+++ b/deps/aro/Tree.zig
@ -77,6 +77,20 @@ pub const Token = struct {
        return copy;
    }

+    pub fn checkMsEof(tok: Token, source: Source, comp: *Compilation) !void {
+        std.debug.assert(tok.id == .eof);
+        if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
+            try comp.diag.add(.{
+                .tag = .ctrl_z_eof,
+                .loc = .{
+                    .id = source.id,
+                    .byte_offset = tok.loc.byte_offset,
+                    .line = tok.loc.line,
+                },
+            }, &.{});
+        }
+    }
+
    pub const List = std.MultiArrayList(Token);
    pub const Id = Tokenizer.Token.Id;
 };
--- a/deps/aro/Type.zig
+++ b/deps/aro/Type.zig
@ -1727,7 +1727,11 @@ pub const Builder = struct {
                    ty = typeof;
                } else {
                    ty.specifier = .int;
-                    try p.err(.missing_type_specifier);
+                    if (p.comp.langopts.standard.atLeast(.c2x)) {
+                        try p.err(.missing_type_specifier_c2x);
+                    } else {
+                        try p.err(.missing_type_specifier);
+                    }
                }
            },
            .void => ty.specifier = .void,
--- a/deps/aro/builtins/BuiltinFunction.zig
+++ b/deps/aro/builtins/BuiltinFunction.zig
--- a/deps/aro/builtins/Properties.zig
+++ b/deps/aro/builtins/Properties.zig
@ -2,10 +2,10 @@ const std = @import("std");

 const Properties = @This();

-language: Language,
-attributes: Attributes,
-header: Header,
-target_set: TargetSet,
+language: Language = .all_languages,
+attributes: Attributes = Attributes{},
+header: Header = .none,
+target_set: TargetSet = TargetSet.initOne(.basic),

 /// Header which must be included for a builtin to be available
 pub const Header = enum {
--- a/deps/aro/builtins/TypeDescription.zig
+++ b/deps/aro/builtins/TypeDescription.zig
@ -68,6 +68,9 @@ pub const ComponentIterator = struct {
            'z' => return .{ .spec = .z },
            'w' => return .{ .spec = .w },
            'F' => return .{ .spec = .F },
+            'G' => return .{ .spec = .G },
+            'H' => return .{ .spec = .H },
+            'M' => return .{ .spec = .M },
            'a' => return .{ .spec = .a },
            'A' => return .{ .spec = .A },
            'V', 'q', 'E' => {
@ -233,6 +236,12 @@ const Spec = union(enum) {
    w,
    /// constant CFString
    F,
+    /// id
+    G,
+    /// SEL
+    H,
+    /// struct objc_super
+    M,
    /// __builtin_va_list
    a,
    /// "reference" to __builtin_va_list
--- a/deps/aro/pragmas/pack.zig
+++ b/deps/aro/pragmas/pack.zig
@ -34,7 +34,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
    var idx = start_idx + 1;
    const l_paren = p.pp.tokens.get(idx);
    if (l_paren.id != .l_paren) {
-        return p.pp.comp.diag.add(.{
+        return p.comp.diag.add(.{
            .tag = .pragma_pack_lparen,
            .loc = l_paren.loc,
        }, l_paren.expansionSlice());
@ -83,7 +83,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
                        }
                    }
                    if (action == .push) {
-                        try pack.stack.append(p.pp.comp.gpa, .{ .label = label orelse "", .val = p.pragma_pack orelse 8 });
+                        try pack.stack.append(p.gpa, .{ .label = label orelse "", .val = p.pragma_pack orelse 8 });
                    } else {
                        pack.pop(p, label);
                        if (new_val != null) {
@ -107,7 +107,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
            const new_val = (try packInt(p, arg)) orelse return;
            idx += 1;
            if (apple_or_xl) {
-                try pack.stack.append(p.pp.comp.gpa, .{ .label = "", .val = p.pragma_pack });
+                try pack.stack.append(p.gpa, .{ .label = "", .val = p.pragma_pack });
            }
            p.pragma_pack = new_val;
        },
--- a/deps/aro/target.zig
+++ b/deps/aro/target.zig
@ -657,6 +657,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
        .netbsd => "netbsd",
        .openbsd => "openbsd",
        .solaris => "solaris",
+        .illumos => "illumos",
        .windows => "windows",
        .zos => "zos",
        .haiku => "haiku",
@ -684,6 +685,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
        .watchos => "watchos",
        .driverkit => "driverkit",
        .shadermodel => "shadermodel",
+        .liteos => "liteos",
        .opencl,
        .glsl450,
        .vulkan,
--- a/deps/aro/unicode.zig
+++ b/deps/aro/unicode.zig
@ -1,41 +0,0 @@
-//! Copied from https://github.com/ziglang/zig/blob/6f0807f50f4e946bb850e746beaa5d6556cf7750/lib/std/unicode.zig
-//! with all safety checks removed. These functions must only be called with known-good buffers that have already
-//! been validated as being legitimate UTF8-encoded data, otherwise undefined behavior will occur.
-
-pub fn utf8ByteSequenceLength_unsafe(first_byte: u8) u3 {
-    return switch (first_byte) {
-        0b0000_0000...0b0111_1111 => 1,
-        0b1100_0000...0b1101_1111 => 2,
-        0b1110_0000...0b1110_1111 => 3,
-        0b1111_0000...0b1111_0111 => 4,
-        else => unreachable,
-    };
-}
-
-pub fn utf8Decode2_unsafe(bytes: []const u8) u21 {
-    var value: u21 = bytes[0] & 0b00011111;
-    value <<= 6;
-    return value | (bytes[1] & 0b00111111);
-}
-
-pub fn utf8Decode3_unsafe(bytes: []const u8) u21 {
-    var value: u21 = bytes[0] & 0b00001111;
-
-    value <<= 6;
-    value |= bytes[1] & 0b00111111;
-
-    value <<= 6;
-    return value | (bytes[2] & 0b00111111);
-}
-
-pub fn utf8Decode4_unsafe(bytes: []const u8) u21 {
-    var value: u21 = bytes[0] & 0b00000111;
-    value <<= 6;
-    value |= bytes[1] & 0b00111111;
-
-    value <<= 6;
-    value |= bytes[2] & 0b00111111;
-
-    value <<= 6;
-    return value | (bytes[3] & 0b00111111);
-}
--- a/lib/std/Build/Step/TranslateC.zig
+++ b/lib/std/Build/Step/TranslateC.zig
@ -17,12 +17,14 @@ target: CrossTarget,
 optimize: std.builtin.OptimizeMode,
 output_file: std.Build.GeneratedFile,
 link_libc: bool,
+use_clang: bool,

 pub const Options = struct {
    source_file: std.Build.LazyPath,
    target: CrossTarget,
    optimize: std.builtin.OptimizeMode,
    link_libc: bool = true,
+    use_clang: bool = true,
 };

 pub fn create(owner: *std.Build, options: Options) *TranslateC {
@ -43,6 +45,7 @@ pub fn create(owner: *std.Build, options: Options) *TranslateC {
        .optimize = options.optimize,
        .output_file = std.Build.GeneratedFile{ .step = &self.step },
        .link_libc = options.link_libc,
+        .use_clang = options.use_clang,
    };
    source.addStepDependencies(&self.step);
    return self;
@ -130,6 +133,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
    if (self.link_libc) {
        try argv_list.append("-lc");
    }
+    if (!self.use_clang) {
+        try argv_list.append("-fno-clang");
+    }

    try argv_list.append("--listen=-");

--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@ -3980,6 +3980,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {

    man.hash.add(@as(u16, 0xb945)); // Random number to distinguish translate-c from compiling C objects
    man.hash.addBytes(c_src);
+    man.hash.add(comp.c_frontend);

    // If the previous invocation resulted in clang errors, we will see a hit
    // here with 0 files in the manifest, in which case it is actually a miss.
--- a/src/aro_translate_c.zig
+++ b/src/aro_translate_c.zig
@ -308,7 +308,6 @@ fn transVarDecl(_: *Context, _: NodeIndex, _: ?usize) Error!void {
 fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: NodeIndex, field_nodes: []const NodeIndex) Error!void {
    const node_types = c.tree.nodes.items(.ty);
    const ty = node_types[@intFromEnum(enum_decl)];
-    const node_data = c.tree.nodes.items(.data);
    if (c.decl_table.get(@intFromPtr(ty.data.@"enum"))) |_|
        return; // Avoid processing this decl twice
    const toplevel = scope.id == .root;
@ -342,11 +341,15 @@ fn transEnumDecl(c: *Context, scope: *Scope, enum_decl: NodeIndex, field_nodes:
                else => |e| return e,
            };

+            const val = c.tree.value_map.get(field_node).?;
+            const str = try std.fmt.allocPrint(c.arena, "{d}", .{val.data.int});
+            const int = try ZigTag.integer_literal.create(c.arena, str);
+
            const enum_const_def = try ZigTag.enum_constant.create(c.arena, .{
                .name = enum_val_name,
                .is_public = toplevel,
                .type = enum_const_type_node,
-                .value = transExpr(c, node_data[@intFromEnum(field_node)].decl.node, .used) catch @panic("TODO"),
+                .value = int,
            });
            if (toplevel)
                try addTopLevelDecl(c, enum_val_name, enum_const_def)
--- a/src/main.zig
+++ b/src/main.zig
@ -4265,6 +4265,7 @@ fn cmdTranslateC(comp: *Compilation, arena: Allocator, fancy_output: ?*Compilati
    defer man.deinit();

    man.hash.add(@as(u16, 0xb945)); // Random number to distinguish translate-c from compiling C objects
+    man.hash.add(comp.c_frontend);
    Compilation.cache_helpers.hashCSource(&man, c_source_file) catch |err| {
        fatal("unable to process '{s}': {s}", .{ c_source_file.src_path, @errorName(err) });
    };
--- a/test/cases/README.md
+++ b/test/cases/README.md
@ -9,7 +9,7 @@ If you want it to be run with `zig test` and match expected error messages:

 ```zig
 // error
-// is_test=1
+// is_test=true
 //
 // :4:13: error: 'try' outside function scope
 ```
@ -22,6 +22,33 @@ This will do `zig run` on the code and expect exit code 0.
 // run
 ```

+## Translate-c
+
+If you want to test translating C code to Zig use `translate-c`:
+
+```c
+// translate-c
+// c_frontend=aro,clang
+// target=x86_64-linux
+//
+// pub const foo = 1;
+// pub const immediately_after_foo = 2;
+//
+// pub const somewhere_else_in_the_file = 3:
+```
+
+## Run Translated C
+
+If you want to test translating C code to Zig and then executing it use `run-translated-c`:
+
+```c
+// run-translated-c
+// c_frontend=aro,clang
+// target=x86_64-linux
+//
+// Hello world!
+```
+
 ## Incremental Compilation

 Make multiple files that have ".", and then an integer, before the ".zig"
--- a/test/cases/compile_errors/access_invalid_typeInfo_decl.zig
+++ b/test/cases/compile_errors/access_invalid_typeInfo_decl.zig
@ -6,6 +6,6 @@ test "Crash" {
 // error
 // backend=stage2
 // target=native
-// is_test=1
+// is_test=true
 //
 // :1:11: error: use of undeclared identifier 'B'
--- a/test/cases/compile_errors/invalid_duplicate_test_decl_name.zig
+++ b/test/cases/compile_errors/invalid_duplicate_test_decl_name.zig
@ -4,7 +4,7 @@ test "thingy" {}
 // error
 // backend=stage2
 // target=native
-// is_test=1
+// is_test=true
 //
 // :1:6: error: duplicate test name: test.thingy
 // :2:6: note: other test here
--- a/test/cases/compile_errors/repeated_invalid_field_access_to_generic_function_returning_type_crashes_compiler_2655.zig
+++ b/test/cases/compile_errors/repeated_invalid_field_access_to_generic_function_returning_type_crashes_compiler_2655.zig
@ -9,6 +9,6 @@ test "1" {
 // error
 // backend=stage2
 // target=native
-// is_test=1
+// is_test=true
 //
 // :2:12: error: use of undeclared identifier 'Q'
--- a/test/cases/compile_errors/return_invalid_type_from_test.zig
+++ b/test/cases/compile_errors/return_invalid_type_from_test.zig
@ -5,6 +5,6 @@ test "example" {
 // error
 // backend=stage2
 // target=native
-// is_test=1
+// is_test=true
 //
 // :2:12: error: expected type 'anyerror!void', found 'comptime_int'
--- a/test/cases/compile_errors/tagName_on_invalid_value_of_non-exhaustive_enum.zig
+++ b/test/cases/compile_errors/tagName_on_invalid_value_of_non-exhaustive_enum.zig
@ -6,7 +6,7 @@ test "enum" {
 // error
 // backend=stage2
 // target=native
-// is_test=1
+// is_test=true
 //
 // :3:9: error: no field with value '@enumFromInt(5)' in enum 'test.enum.E'
 // :2:15: note: declared here
--- a/test/cases/f32_passed_to_variadic_fn.zig
+++ b/test/cases/f32_passed_to_variadic_fn.zig
@ -9,7 +9,7 @@ pub fn main() void {
 // run
 // backend=llvm
 // target=x86_64-linux-gnu
-// link_libc=1
+// link_libc=true
 //
 // f64: 2.000000
 // f32: 10.000000
--- a/test/cases/fn_typeinfo_passed_to_comptime_fn.zig
+++ b/test/cases/fn_typeinfo_passed_to_comptime_fn.zig
@ -13,6 +13,6 @@ fn foo(comptime info: std.builtin.Type) !void {
 }

 // run
-// is_test=1
+// is_test=true
 // backend=llvm
 //
--- a/test/cases/llvm/hello_world.zig
+++ b/test/cases/llvm/hello_world.zig
@ -7,7 +7,7 @@ pub fn main() void {
 // run
 // backend=llvm
 // target=x86_64-linux,x86_64-macos
-// link_libc=1
+// link_libc=true
 //
 // hello world!
 //
--- a/test/cases/run_translated_c/dereference
+++ b/test/cases/run_translated_c/dereference
@ -0,0 +1,11 @@
+#include <stdlib.h>
+int main(void) {
+    int i = 0;
+    *&i = 42;
+    if (i != 42) abort();
+    return 0;
+}
+
+// run-translated-c
+// c_frontend=clang
+// link_libc=true
--- a/test/cases/translate_c/enums
+++ b/test/cases/translate_c/enums
@ -0,0 +1,16 @@
+enum Foo {
+    FooA = 2,
+    FooB = 5,
+    Foo1,
+};
+
+// translate-c
+// target=x86_64-windows-msvc
+// c_frontend=clang
+//
+// pub const FooA: c_int = 2;
+// pub const FooB: c_int = 5;
+// pub const Foo1: c_int = 6;
+// pub const enum_Foo = c_int;
+//
+// pub const Foo = enum_Foo;
--- a/test/cases/translate_c/enums.c
+++ b/test/cases/translate_c/enums.c
@ -0,0 +1,16 @@
+enum Foo {
+    FooA = 2,
+    FooB = 5,
+    Foo1,
+};
+
+// translate-c
+// target=x86_64-linux
+// c_frontend=clang,aro
+//
+// pub const FooA: c_int = 2;
+// pub const FooB: c_int = 5;
+// pub const Foo1: c_int = 6;
+// pub const enum_Foo = c_uint;
+//
+// pub const Foo = enum_Foo;
--- a/test/cases/try_in_comptime_in_struct_in_test.zig
+++ b/test/cases/try_in_comptime_in_struct_in_test.zig
@ -8,6 +8,6 @@ test "@unionInit on union w/ tag but no fields" {
 }

 // error
-// is_test=1
+// is_test=true
 //
 // :4:13: error: 'try' outside function scope
--- a/test/run_translated_c.zig
+++ b/test/run_translated_c.zig
@ -2,17 +2,14 @@ const std = @import("std");
 const tests = @import("tests.zig");
 const nl = if (@import("builtin").os.tag == .windows) "\r\n" else "\n";

-pub fn addCases(cases: *tests.RunTranslatedCContext) void {
-    cases.add("dereference address of",
-        \\#include <stdlib.h>
-        \\int main(void) {
-        \\    int i = 0;
-        \\    *&i = 42;
-        \\    if (i != 42) abort();
-        \\	  return 0;
-        \\}
-    , "");
+// *********************************************************
+// *                                                       *
+// *               DO NOT ADD NEW CASES HERE               *
+// *   instead add a file to test/cases/run_translated_c   *
+// *                                                       *
+// *********************************************************

+pub fn addCases(cases: *tests.RunTranslatedCContext) void {
    cases.add("division of floating literals",
        \\#define _NO_CRT_STDIO_INLINE 1
        \\#include <stdio.h>
--- a/test/src/Cases.zig
+++ b/test/src/Cases.zig
@ -1,6 +1,7 @@
 gpa: Allocator,
 arena: Allocator,
 cases: std.ArrayList(Case),
+translate: std.ArrayList(Translate),
 incremental_cases: std.ArrayList(IncrementalCase),

 pub const IncrementalCase = struct {
@ -36,7 +37,7 @@ pub const Update = struct {
        Execution: []const u8,
        /// A header update compiles the input with the equivalent of
        /// `-femit-h` and tests the produced header against the
-        /// expected result
+        /// expected result.
        Header: []const u8,
    },

@ -61,6 +62,11 @@ pub const Backend = enum {
    llvm,
 };

+pub const CFrontend = enum {
+    clang,
+    aro,
+};
+
 /// A `Case` consists of a list of `Update`. The same `Compilation` is used for each
 /// update, so each update's source is treated as a single file being
 /// updated by the test harness and incrementally compiled.
@ -143,6 +149,25 @@ pub const Case = struct {
    }
 };

+pub const Translate = struct {
+    /// The name of the test case. This is shown if a test fails, and
+    /// otherwise ignored.
+    name: []const u8,
+
+    input: [:0]const u8,
+    target: CrossTarget,
+    link_libc: bool,
+    c_frontend: CFrontend,
+    kind: union(enum) {
+        /// Translate the input, run it and check that it
+        /// outputs the expected text.
+        run: []const u8,
+        /// Translate the input and check that it contains
+        /// the expected lines of code.
+        translate: []const []const u8,
+    },
+};
+
 pub fn addExe(
    ctx: *Cases,
    name: []const u8,
@ -346,9 +371,12 @@ pub fn addCompile(
 pub fn addFromDir(ctx: *Cases, dir: std.fs.IterableDir) void {
    var current_file: []const u8 = "none";
    ctx.addFromDirInner(dir, &current_file) catch |err| {
-        std.debug.panic("test harness failed to process file '{s}': {s}\n", .{
-            current_file, @errorName(err),
-        });
+        std.debug.panicExtra(
+            @errorReturnTrace(),
+            @returnAddress(),
+            "test harness failed to process file '{s}': {s}\n",
+            .{ current_file, @errorName(err) },
+        );
    };
 }

@ -395,10 +423,44 @@ fn addFromDirInner(

        const backends = try manifest.getConfigForKeyAlloc(ctx.arena, "backend", Backend);
        const targets = try manifest.getConfigForKeyAlloc(ctx.arena, "target", CrossTarget);
+        const c_frontends = try manifest.getConfigForKeyAlloc(ctx.arena, "c_frontend", CFrontend);
        const is_test = try manifest.getConfigForKeyAssertSingle("is_test", bool);
        const link_libc = try manifest.getConfigForKeyAssertSingle("link_libc", bool);
        const output_mode = try manifest.getConfigForKeyAssertSingle("output_mode", std.builtin.OutputMode);

+        if (manifest.type == .translate_c) {
+            for (c_frontends) |c_frontend| {
+                for (targets) |target| {
+                    const output = try manifest.trailingLinesSplit(ctx.arena);
+                    try ctx.translate.append(.{
+                        .name = std.fs.path.stem(filename),
+                        .c_frontend = c_frontend,
+                        .target = target,
+                        .link_libc = link_libc,
+                        .input = src,
+                        .kind = .{ .translate = output },
+                    });
+                }
+            }
+            continue;
+        }
+        if (manifest.type == .run_translated_c) {
+            for (c_frontends) |c_frontend| {
+                for (targets) |target| {
+                    const output = try manifest.trailingSplit(ctx.arena);
+                    try ctx.translate.append(.{
+                        .name = std.fs.path.stem(filename),
+                        .c_frontend = c_frontend,
+                        .target = target,
+                        .link_libc = link_libc,
+                        .input = src,
+                        .kind = .{ .run = output },
+                    });
+                }
+            }
+            continue;
+        }
+
        var cases = std.ArrayList(usize).init(ctx.arena);

        // Cross-product to get all possible test combinations
@ -439,21 +501,15 @@ fn addFromDirInner(
                    case.addCompile(src);
                },
                .@"error" => {
-                    const errors = try manifest.trailingAlloc(ctx.arena);
+                    const errors = try manifest.trailingLines(ctx.arena);
                    case.addError(src, errors);
                },
                .run => {
-                    var output = std.ArrayList(u8).init(ctx.arena);
-                    var trailing_it = manifest.trailing();
-                    while (trailing_it.next()) |line| {
-                        try output.appendSlice(line);
-                        try output.append('\n');
-                    }
-                    if (output.items.len > 0) {
-                        try output.resize(output.items.len - 1);
-                    }
-                    case.addCompareOutput(src, try output.toOwnedSlice());
+                    const output = try manifest.trailingSplit(ctx.arena);
+                    case.addCompareOutput(src, output);
                },
+                .translate_c => @panic("c_frontend specified for compile case"),
+                .run_translated_c => @panic("c_frontend specified for compile case"),
                .cli => @panic("TODO cli tests"),
            }
        }
@ -468,6 +524,7 @@ pub fn init(gpa: Allocator, arena: Allocator) Cases {
    return .{
        .gpa = gpa,
        .cases = std.ArrayList(Case).init(gpa),
+        .translate = std.ArrayList(Translate).init(gpa),
        .incremental_cases = std.ArrayList(IncrementalCase).init(gpa),
        .arena = arena,
    };
@ -482,7 +539,7 @@ pub fn lowerToBuildSteps(
    incremental_exe: *std.Build.Step.Compile,
 ) void {
    const host = std.zig.system.NativeTargetInfo.detect(.{}) catch |err|
-        std.debug.panic("unable to detect notive host: {s}\n", .{@errorName(err)});
+        std.debug.panic("unable to detect native host: {s}\n", .{@errorName(err)});

    for (self.incremental_cases.items) |incr_case| {
        if (true) {
@ -589,7 +646,7 @@ pub fn lowerToBuildSteps(
            .Execution => |expected_stdout| no_exec: {
                const run = if (case.target.ofmt == .c) run_step: {
                    const target_info = std.zig.system.NativeTargetInfo.detect(case.target) catch |err|
-                        std.debug.panic("unable to detect notive host: {s}\n", .{@errorName(err)});
+                        std.debug.panic("unable to detect target host: {s}\n", .{@errorName(err)});
                    if (host.getExternalExecutor(&target_info, .{ .link_libc = true }) != .native) {
                        // We wouldn't be able to run the compiled C code.
                        break :no_exec;
@ -623,6 +680,68 @@ pub fn lowerToBuildSteps(
            .Header => @panic("TODO"),
        }
    }
+
+    for (self.translate.items) |case| switch (case.kind) {
+        .run => |output| {
+            const annotated_case_name = b.fmt("run-translated-c  {s}", .{case.name});
+            if (opt_test_filter) |filter| {
+                if (std.mem.indexOf(u8, annotated_case_name, filter) == null) continue;
+            }
+            if (!std.process.can_spawn) {
+                std.debug.print("Unable to spawn child processes on {s}, skipping test.\n", .{@tagName(builtin.os.tag)});
+                continue; // Pass test.
+            }
+
+            const target_info = std.zig.system.NativeTargetInfo.detect(case.target) catch |err|
+                std.debug.panic("unable to detect target host: {s}\n", .{@errorName(err)});
+            if (host.getExternalExecutor(&target_info, .{ .link_libc = true }) != .native) {
+                // We wouldn't be able to run the compiled C code.
+                continue; // Pass test.
+            }
+
+            const write_src = b.addWriteFiles();
+            const file_source = write_src.add("tmp.c", case.input);
+
+            const translate_c = b.addTranslateC(.{
+                .source_file = file_source,
+                .optimize = .Debug,
+                .target = case.target,
+                .link_libc = case.link_libc,
+                .use_clang = case.c_frontend == .clang,
+            });
+            translate_c.step.name = b.fmt("{s} translate-c", .{annotated_case_name});
+
+            const run_exe = translate_c.addExecutable(.{});
+            run_exe.step.name = b.fmt("{s} build-exe", .{annotated_case_name});
+            run_exe.linkLibC();
+            const run = b.addRunArtifact(run_exe);
+            run.step.name = b.fmt("{s} run", .{annotated_case_name});
+            run.expectStdOutEqual(output);
+
+            parent_step.dependOn(&run.step);
+        },
+        .translate => |output| {
+            const annotated_case_name = b.fmt("zig translate-c {s}", .{case.name});
+            if (opt_test_filter) |filter| {
+                if (std.mem.indexOf(u8, annotated_case_name, filter) == null) continue;
+            }
+
+            const write_src = b.addWriteFiles();
+            const file_source = write_src.add("tmp.c", case.input);
+
+            const translate_c = b.addTranslateC(.{
+                .source_file = file_source,
+                .optimize = .Debug,
+                .target = case.target,
+                .link_libc = case.link_libc,
+                .use_clang = case.c_frontend == .clang,
+            });
+            translate_c.step.name = annotated_case_name;
+
+            const check_file = translate_c.addCheckFile(output);
+            parent_step.dependOn(&check_file.step);
+        },
+    };
 }

 /// Sort test filenames in-place, so that incremental test cases ("foo.0.zig",
@ -780,7 +899,7 @@ const TestManifestConfigDefaults = struct {
        if (std.mem.eql(u8, key, "backend")) {
            return "stage2";
        } else if (std.mem.eql(u8, key, "target")) {
-            if (@"type" == .@"error") {
+            if (@"type" == .@"error" or @"type" == .translate_c or @"type" == .run_translated_c) {
                return "native";
            }
            return comptime blk: {
@ -807,12 +926,16 @@ const TestManifestConfigDefaults = struct {
                .@"error" => "Obj",
                .run => "Exe",
                .compile => "Obj",
+                .translate_c => "Obj",
+                .run_translated_c => "Obj",
                .cli => @panic("TODO test harness for CLI tests"),
            };
        } else if (std.mem.eql(u8, key, "is_test")) {
-            return "0";
+            return "false";
        } else if (std.mem.eql(u8, key, "link_libc")) {
-            return "0";
+            return "false";
+        } else if (std.mem.eql(u8, key, "c_frontend")) {
+            return "clang";
        } else unreachable;
    }
 };
@ -844,6 +967,8 @@ const TestManifest = struct {
        run,
        cli,
        compile,
+        translate_c,
+        run_translated_c,
    };

    const TrailingIterator = struct {
@ -912,6 +1037,10 @@ const TestManifest = struct {
                break :blk .cli;
            } else if (std.mem.eql(u8, raw, "compile")) {
                break :blk .compile;
+            } else if (std.mem.eql(u8, raw, "translate-c")) {
+                break :blk .translate_c;
+            } else if (std.mem.eql(u8, raw, "run-translated-c")) {
+                break :blk .run_translated_c;
            } else {
                std.log.warn("unknown test case type requested: {s}", .{raw});
                return error.UnknownTestCaseType;
@ -979,7 +1108,21 @@ const TestManifest = struct {
        };
    }

-    fn trailingAlloc(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
+    fn trailingSplit(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const u8 {
+        var out = std.ArrayList(u8).init(allocator);
+        defer out.deinit();
+        var trailing_it = self.trailing();
+        while (trailing_it.next()) |line| {
+            try out.appendSlice(line);
+            try out.append('\n');
+        }
+        if (out.items.len > 0) {
+            try out.resize(out.items.len - 1);
+        }
+        return try out.toOwnedSlice();
+    }
+
+    fn trailingLines(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
        var out = std.ArrayList([]const u8).init(allocator);
        defer out.deinit();
        var it = self.trailing();
@ -989,6 +1132,28 @@ const TestManifest = struct {
        return try out.toOwnedSlice();
    }

+    fn trailingLinesSplit(self: TestManifest, allocator: Allocator) error{OutOfMemory}![]const []const u8 {
+        // Collect output lines split by empty lines
+        var out = std.ArrayList([]const u8).init(allocator);
+        defer out.deinit();
+        var buf = std.ArrayList(u8).init(allocator);
+        defer buf.deinit();
+        var it = self.trailing();
+        while (it.next()) |line| {
+            if (line.len == 0) {
+                if (buf.items.len != 0) {
+                    try out.append(try buf.toOwnedSlice());
+                    buf.items.len = 0;
+                }
+                continue;
+            }
+            try buf.appendSlice(line);
+            try buf.append('\n');
+        }
+        try out.append(try buf.toOwnedSlice());
+        return try out.toOwnedSlice();
+    }
+
    fn ParseFn(comptime T: type) type {
        return fn ([]const u8) anyerror!T;
    }
@ -1011,8 +1176,10 @@ const TestManifest = struct {
            }.parse,
            .Bool => return struct {
                fn parse(str: []const u8) anyerror!T {
-                    const as_int = try std.fmt.parseInt(u1, str, 0);
-                    return as_int > 0;
+                    if (std.mem.eql(u8, str, "true")) return true;
+                    if (std.mem.eql(u8, str, "false")) return false;
+                    std.debug.print("{s}\n", .{str});
+                    return error.InvalidBool;
                }
            }.parse,
            .Enum => return struct {
@ -1124,9 +1291,47 @@ pub fn main() !void {
            if (cases.items.len == 0) {
                const backends = try manifest.getConfigForKeyAlloc(arena, "backend", Backend);
                const targets = try manifest.getConfigForKeyAlloc(arena, "target", CrossTarget);
+                const c_frontends = try manifest.getConfigForKeyAlloc(ctx.arena, "c_frontend", CFrontend);
                const is_test = try manifest.getConfigForKeyAssertSingle("is_test", bool);
+                const link_libc = try manifest.getConfigForKeyAssertSingle("link_libc", bool);
                const output_mode = try manifest.getConfigForKeyAssertSingle("output_mode", std.builtin.OutputMode);

+                if (manifest.type == .translate_c) {
+                    for (c_frontends) |c_frontend| {
+                        for (targets) |target| {
+                            const output = try manifest.trailingLinesSplit(ctx.arena);
+                            try ctx.translate.append(.{
+                                .name = std.fs.path.stem(filename),
+                                .c_frontend = c_frontend,
+                                .target = target,
+                                .is_test = is_test,
+                                .link_libc = link_libc,
+                                .input = src,
+                                .kind = .{ .translate = output },
+                            });
+                        }
+                    }
+                    continue;
+                }
+                if (manifest.type == .run_translated_c) {
+                    for (c_frontends) |c_frontend| {
+                        for (targets) |target| {
+                            const output = try manifest.trailingSplit(ctx.arena);
+                            try ctx.translate.append(.{
+                                .name = std.fs.path.stem(filename),
+                                .c_frontend = c_frontend,
+                                .target = target,
+                                .is_test = is_test,
+                                .link_libc = link_libc,
+                                .output = output,
+                                .input = src,
+                                .kind = .{ .run = output },
+                            });
+                        }
+                    }
+                    continue;
+                }
+
                // Cross-product to get all possible test combinations
                for (backends) |backend| {
                    for (targets) |target| {
@ -1158,7 +1363,7 @@ pub fn main() !void {
                        case.addCompile(src);
                    },
                    .@"error" => {
-                        const errors = try manifest.trailingAlloc(arena);
+                        const errors = try manifest.trailingLines(arena);
                        switch (strategy) {
                            .independent => {
                                case.addError(src, errors);
@ -1169,17 +1374,11 @@ pub fn main() !void {
                        }
                    },
                    .run => {
-                        var output = std.ArrayList(u8).init(arena);
-                        var trailing_it = manifest.trailing();
-                        while (trailing_it.next()) |line| {
-                            try output.appendSlice(line);
-                            try output.append('\n');
-                        }
-                        if (output.items.len > 0) {
-                            try output.resize(output.items.len - 1);
-                        }
-                        case.addCompareOutput(src, try output.toOwnedSlice());
+                        const output = try manifest.trailingSplit(ctx.arena);
+                        case.addCompareOutput(src, output);
                    },
+                    .translate_c => @panic("c_frontend specified for compile case"),
+                    .run_translated_c => @panic("c_frontend specified for compile case"),
                    .cli => @panic("TODO cli tests"),
                }
            }
@ -1255,6 +1454,11 @@ fn runCases(self: *Cases, zig_exe_path: []const u8) !void {
                host,
            );
        }
+
+        for (self.translate.items) |*case| {
+            _ = case;
+            @panic("TODO is this even used?");
+        }
    }
 }

--- a/test/translate_c.zig
+++ b/test/translate_c.zig
@ -3,6 +3,13 @@ const builtin = @import("builtin");
 const tests = @import("tests.zig");
 const CrossTarget = std.zig.CrossTarget;

+// ********************************************************
+// *                                                      *
+// *               DO NOT ADD NEW CASES HERE              *
+// *     instead add a file to test/cases/translate_c     *
+// *                                                      *
+// ********************************************************
+
 pub fn addCases(cases: *tests.TranslateCContext) void {
    const default_enum_type = if (builtin.abi == .msvc) "c_int" else "c_uint";

@ -3315,23 +3322,6 @@ pub fn addCases(cases: *tests.TranslateCContext) void {
        \\pub const FOO_CHAR = '\x3f';
    });

-    cases.add("enums",
-        \\enum Foo {
-        \\    FooA = 2,
-        \\    FooB = 5,
-        \\    Foo1,
-        \\};
-    , &[_][]const u8{
-        \\pub const FooA: c_int = 2;
-        \\pub const FooB: c_int = 5;
-        \\pub const Foo1: c_int = 6;
-        \\pub const enum_Foo =
-        ++ " " ++ default_enum_type ++
-            \\;
-        ,
-        \\pub const Foo = enum_Foo;
-    });
-
    cases.add("macro cast",
        \\#include <stdint.h>
        \\int baz(void *arg) { return 0; }