std-c parser typing improvements

This commit is contained in:
Vexu 2020-01-07 16:05:13 +02:00
parent 3ed6d7d245
commit df12c1328e
No known key found for this signature in database
GPG Key ID: 59AEB8936E16A6AC
3 changed files with 254 additions and 57 deletions

View File

@ -23,6 +23,11 @@ pub const Tree = struct {
arena_allocator.deinit();
// self is destroyed
}
pub fn slice(tree: *Tree, token: TokenIndex) []const u8 {
const tok = tree.tokens.at(token);
return tok.source.buffer[tok.start..tok.end];
}
};
pub const Msg = struct {
@ -47,19 +52,19 @@ pub const Error = union(enum) {
DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"),
DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"),
pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void {
pub fn render(self: *const Error, tree: *Tree, stream: var) !void {
switch (self.*) {
.InvalidToken => |*x| return x.render(tokens, stream),
.ExpectedToken => |*x| return x.render(tokens, stream),
.ExpectedExpr => |*x| return x.render(tokens, stream),
.ExpectedStmt => |*x| return x.render(tokens, stream),
.ExpectedTypeName => |*x| return x.render(tokens, stream),
.ExpectedDeclarator => |*x| return x.render(tokens, stream),
.ExpectedFnBody => |*x| return x.render(tokens, stream),
.ExpectedInitializer => |*x| return x.render(tokens, stream),
.InvalidTypeSpecifier => |*x| return x.render(tokens, stream),
.DuplicateQualifier => |*x| return x.render(tokens, stream),
.DuplicateSpecifier => |*x| return x.render(tokens, stream),
.InvalidToken => |*x| return x.render(tree, stream),
.ExpectedToken => |*x| return x.render(tree, stream),
.ExpectedExpr => |*x| return x.render(tree, stream),
.ExpectedStmt => |*x| return x.render(tree, stream),
.ExpectedTypeName => |*x| return x.render(tree, stream),
.ExpectedDeclarator => |*x| return x.render(tree, stream),
.ExpectedFnBody => |*x| return x.render(tree, stream),
.ExpectedInitializer => |*x| return x.render(tree, stream),
.InvalidTypeSpecifier => |*x| return x.render(tree, stream),
.DuplicateQualifier => |*x| return x.render(tree, stream),
.DuplicateSpecifier => |*x| return x.render(tree, stream),
}
}
@ -83,8 +88,8 @@ pub const Error = union(enum) {
token: TokenIndex,
expected_id: @TagType(Token.Id),
pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void {
const found_token = tokens.at(self.token);
pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
const found_token = tree.tokens.at(self.token);
if (found_token.id == .Invalid) {
return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()});
} else {
@ -98,10 +103,10 @@ pub const Error = union(enum) {
token: TokenIndex,
type_spec: *Node.TypeSpec,
pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void {
pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
try stream.write("invalid type specifier '");
try type_spec.spec.print(tokens, stream);
const token_name = tokens.at(self.token).id.symbol();
try type_spec.spec.print(tree, stream);
const token_name = tree.tokens.at(self.token).id.symbol();
return stream.print("{}'", .{token_name});
}
};
@ -110,14 +115,59 @@ pub const Error = union(enum) {
return struct {
token: TokenIndex,
pub fn render(self: *const @This(), tokens: *Tree.TokenList, stream: var) !void {
const actual_token = tokens.at(self.token);
pub fn render(self: *const @This(), tree: *Tree, stream: var) !void {
const actual_token = tree.tokens.at(self.token);
return stream.print(msg, .{actual_token.id.symbol()});
}
};
}
};
pub const Type = struct {
pub const TypeList = std.SegmentedList(*Type, 4);
@"const": bool,
atomic: bool,
@"volatile": bool,
restrict: bool,
id: union(enum) {
Int: struct {
quals: Qualifiers,
id: Id,
is_signed: bool,
pub const Id = enum {
Char,
Short,
Int,
Long,
LongLong,
};
},
Float: struct {
quals: Qualifiers,
id: Id,
pub const Id = enum {
Float,
Double,
LongDouble,
};
},
Pointer: struct {
quals: Qualifiers,
child_type: *Type,
},
Function: struct {
return_type: *Type,
param_types: TypeList,
},
Typedef: *Type,
Record: *Node.RecordType,
Enum: *Node.EnumType,
},
};
pub const Node = struct {
id: Id,
@ -205,22 +255,128 @@ pub const Node = struct {
typename: *Node,
rparen: TokenIndex,
},
Enum: *EnumType,
Record: *RecordType,
Typedef: struct {
sym: TokenIndex,
sym_type: *Type,
},
//todo
// @"enum",
// record,
Typedef: TokenIndex,
pub fn print(self: *@This(), self: *const @This(), tokens: *Tree.TokenList, stream: var) !void {
switch (self) {
pub fn print(self: *@This(), self: *const @This(), tree: *Tree, stream: var) !void {
switch (self.spec) {
.None => unreachable,
else => @panic("TODO print type specifier"),
.Void => |index| try stream.write(tree.slice(index)),
.Char => |char| {
if (char.sign) |s| {
try stream.write(tree.slice(s));
try stream.writeByte(' ');
}
try stream.write(tree.slice(char.char));
},
.Short => |short| {
if (short.sign) |s| {
try stream.write(tree.slice(s));
try stream.writeByte(' ');
}
try stream.write(tree.slice(short.short));
if (short.int) |i| {
try stream.writeByte(' ');
try stream.write(tree.slice(i));
}
},
.Int => |int| {
if (int.sign) |s| {
try stream.write(tree.slice(s));
try stream.writeByte(' ');
}
if (int.int) |i| {
try stream.writeByte(' ');
try stream.write(tree.slice(i));
}
},
.Long => |long| {
if (long.sign) |s| {
try stream.write(tree.slice(s));
try stream.writeByte(' ');
}
try stream.write(tree.slice(long.long));
if (long.longlong) |l| {
try stream.writeByte(' ');
try stream.write(tree.slice(l));
}
if (long.int) |i| {
try stream.writeByte(' ');
try stream.write(tree.slice(i));
}
},
.Float => |float| {
try stream.write(tree.slice(float.float));
if (float.complex) |c| {
try stream.writeByte(' ');
try stream.write(tree.slice(c));
}
},
.Double => |double| {
if (double.long) |l| {
try stream.write(tree.slice(l));
try stream.writeByte(' ');
}
try stream.write(tree.slice(double.double));
if (double.complex) |c| {
try stream.writeByte(' ');
try stream.write(tree.slice(c));
}
},
.Bool => |index| try stream.write(tree.slice(index)),
.Typedef => |typedef| try stream.write(tree.slice(typedef.sym)),
else => try stream.print("TODO print {}", self.spec),
}
}
} = .None,
};
pub const EnumType = struct {
tok: TokenIndex,
name: ?TokenIndex,
body: ?struct {
lbrace: TokenIndex,
/// always EnumField
fields: FieldList,
rbrace: TokenIndex,
},
pub const FieldList = Root.DeclList;
};
pub const EnumField = struct {
base: Node = Node{ .id = EnumField },
name: TokenIndex,
value: ?*Node,
};
pub const RecordType = struct {
kind: union(enum) {
Struct: TokenIndex,
Union: TokenIndex,
},
name: ?TokenIndex,
body: ?struct {
lbrace: TokenIndex,
/// RecordField or StaticAssert
fields: FieldList,
rbrace: TokenIndex,
},
pub const FieldList = Root.DeclList;
};
pub const RecordField = struct {
base: Node = Node{ .id = RecordField },
// TODO
};
pub const TypeQual = struct {
@"const": ?TokenIndex = null,
atomic: ?TokenIndex = null,

View File

@ -3,6 +3,7 @@ const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ast = std.c.ast;
const Node = ast.Node;
const Type = ast.Type;
const Tree = ast.Tree;
const TokenIndex = ast.TokenIndex;
const Token = std.c.Token;
@ -57,10 +58,12 @@ pub fn parse(allocator: *Allocator, source: []const u8) !*Tree {
}
var parser = Parser{
.symbols = Parser.SymbolList.init(allocator),
.arena = arena,
.it = &it,
.tree = tree,
};
defer parser.symbols.deinit();
tree.root_node = try parser.root();
return tree;
@ -72,19 +75,35 @@ const Parser = struct {
tree: *Tree,
/// only used for scopes
arena_allocator: std.heap.ArenaAllocator,
// scopes: std.SegmentedLists(Scope),
symbols: SymbolList,
warnings: bool = true,
// const Scope = struct {
// types:
// syms:
// };
const SymbolList = std.ArrayList(Symbol);
fn getTypeDef(parser: *Parser, tok: TokenIndex) bool {
return false; // TODO
// const token = parser.it.list.at(tok);
// return parser.typedefs.contains(token.slice());
const Symbol = struct {
name: []const u8,
ty: *Type,
};
fn pushScope(parser: *Parser) usize {
return parser.symbols.len;
}
fn popScope(parser: *Parser, len: usize) void {
parser.symbols.resize(len) catch unreachable;
}
fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type {
const token = parser.it.list.at(tok);
const name = parser.tree.slice(token);
const syms = parser.symbols.toSliceConst();
var i = syms.len;
while (i > 0) : (i -= 1) {
if (mem.eql(u8, name, syms[i].name)) {
return syms[i].ty;
}
}
return null;
}
/// Root <- ExternalDeclaration* eof
@ -264,8 +283,8 @@ const Parser = struct {
/// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double
/// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary /
/// / Keyword_atomic LPAREN TypeName RPAREN
/// / EnumSpecifier
/// / RecordSpecifier
/// / EnumSpec
/// / RecordSpec
/// / IDENTIFIER // typedef name
/// / TypeQual
fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool {
@ -473,22 +492,48 @@ const Parser = struct {
} else if (parser.eatToken(.Keyword_enum)) |tok| {
if (type_spec.spec != .None)
break :blk;
@panic("TODO enum type");
// return true;
type_spec.Enum = try parser.enumSpec(tok);
return true;
} else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| {
if (type_spec.spec != .None)
break :blk;
@panic("TODO record type");
// return true;
type_spec.Record = try parser.recordSpec();
return true;
} else if (parser.eatToken(.Identifier)) |tok| {
if (!parser.getTypeDef(tok)) {
const ty = parser.getSymbol(tok) orelse {
parser.putBackToken(tok);
return false;
}
type_spec.spec = .{
.Typedef = tok,
};
return true;
switch (ty) {
.Enum => |e| {
return parser.err(.{
.MustUseKwToRefer = .{ .kw = e.identifier, .sym = tok },
});
},
.Record => |r| {
return parser.err(.{
.MustUseKwToRefer = .{
.kw = switch (r.kind) {
.Struct, .Union => |kw| kw,
},
.sym = tok,
},
});
},
.Typedef => {
type_spec.spec = .{
.Typedef = .{
.sym = tok,
.sym_type = ty,
},
};
return true;
},
else => {
parser.putBackToken(tok);
return false;
},
}
}
}
return parser.err(.{
@ -567,13 +612,13 @@ const Parser = struct {
return false;
}
/// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)?
/// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)?
fn enumSpecifier(parser: *Parser) !*Node {}
/// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA?
fn enumField(parser: *Parser) !*Node {}
/// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)?
/// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)?
fn recordSpecifier(parser: *Parser) !*Node {}
/// RecordField
@ -581,8 +626,7 @@ const Parser = struct {
/// \ StaticAssert
fn recordField(parser: *Parser) !*Node {}
/// TypeName
/// <- TypeSpec* AbstractDeclarator?
/// TypeName <- TypeSpec* AbstractDeclarator?
fn typeName(parser: *Parser) !*Node {
/// RecordDeclarator <- Declarator? (COLON ConstExpr)?

View File

@ -327,6 +327,7 @@ pub const Token = struct {
};
// TODO perfect hash at comptime
// TODO do this in the preprocessor
pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
var hash = std.hash_map.hashString(bytes);
for (keywords) |kw| {
@ -347,10 +348,6 @@ pub const Token = struct {
return null;
}
pub fn slice(tok: Token) []const u8 {
return tok.source.buffer[tok.start..tok.end];
}
pub const NumSuffix = enum {
None,
F,