stage2: character literals and multiline strings

This commit is contained in:
Vexu 2020-08-18 15:33:11 +03:00
parent e0b01bd4a9
commit 2b45e23477
No known key found for this signature in database
GPG Key ID: 59AEB8936E16A6AC
4 changed files with 185 additions and 2 deletions

View File

@ -80,6 +80,107 @@ pub fn binNameAlloc(
}
}
/// Only validates escape sequence characters.
/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
pub fn parseCharLiteral(
slice: []const u8,
bad_index: *usize, // populated if error.InvalidCharacter is returned)
) error{InvalidCharacter}!u32 {
std.debug.assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
if (slice[1] == '\\') {
switch (slice[2]) {
'n' => return '\n',
'r' => return '\r',
'\\' => return '\\',
't' => return '\t',
'\'' => return '\'',
'"' => return '"',
'x' => {
if (slice.len != 6) {
bad_index.* = slice.len - 2;
return error.InvalidCharacter;
}
var value: u32 = 0;
for (slice[3..5]) |c, i| {
switch (slice[3]) {
'0'...'9' => {
value *= 16;
value += c - '0';
},
'a'...'f' => {
value *= 16;
value += c - 'a';
},
'A'...'F' => {
value *= 16;
value += c - 'a';
},
else => {
bad_index.* = i;
return error.InvalidCharacter;
},
}
}
return value;
},
'u' => {
if (slice.len < 6 or slice[3] != '{') {
bad_index.* = 2;
return error.InvalidCharacter;
}
var value: u32 = 0;
for (slice[4..]) |c, i| {
if (value > 0x10ffff) {
bad_index.* = i;
return error.InvalidCharacter;
}
switch (c) {
'0'...'9' => {
value *= 16;
value += c - '0';
},
'a'...'f' => {
value *= 16;
value += c - 'a';
},
'A'...'F' => {
value *= 16;
value += c - 'A';
},
'}' => break,
else => {
bad_index.* = i;
return error.InvalidCharacter;
},
}
}
return value;
},
else => {
bad_index.* = 2;
return error.InvalidCharacter;
}
}
}
return std.unicode.utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
}
test "parseCharLiteral" {
var bad_index: usize = undefined;
std.testing.expectEqual(try parseCharLiteral("'a'", &bad_index), 'a');
std.testing.expectEqual(try parseCharLiteral("'ä'", &bad_index), 'ä');
std.testing.expectEqual(try parseCharLiteral("'\\x00'", &bad_index), 0);
std.testing.expectEqual(try parseCharLiteral("'ぁ'", &bad_index), 0x3041);
std.testing.expectEqual(try parseCharLiteral("'\\u{3041}'", &bad_index), 0x3041);
std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\x0'", &bad_index));
std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\y'", &bad_index));
std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u'", &bad_index));
std.testing.expectError(error.InvalidCharacter, parseCharLiteral("'\\u{FFFFFF}'", &bad_index));
}
test "" {
@import("std").meta.refAllDecls(@This());
}

View File

@ -131,6 +131,8 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr
.ArrayType => return rlWrap(mod, scope, rl, try arrayType(mod, scope, node.castTag(.ArrayType).?)),
.ArrayTypeSentinel => return rlWrap(mod, scope, rl, try arrayTypeSentinel(mod, scope, node.castTag(.ArrayTypeSentinel).?)),
.EnumLiteral => return rlWrap(mod, scope, rl, try enumLiteral(mod, scope, node.castTag(.EnumLiteral).?)),
.MultilineStringLiteral => return rlWrap(mod, scope, rl, try multilineStrLiteral(mod, scope, node.castTag(.MultilineStringLiteral).?)),
.CharLiteral => return rlWrap(mod, scope, rl, try charLiteral(mod, scope, node.castTag(.CharLiteral).?)),
.Defer => return mod.failNode(scope, node, "TODO implement astgen.expr for .Defer", .{}),
.Catch => return mod.failNode(scope, node, "TODO implement astgen.expr for .Catch", .{}),
@ -159,8 +161,6 @@ pub fn expr(mod: *Module, scope: *Scope, rl: ResultLoc, node: *ast.Node) InnerEr
.ErrorType => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorType", .{}),
.FnProto => return mod.failNode(scope, node, "TODO implement astgen.expr for .FnProto", .{}),
.AnyFrameType => return mod.failNode(scope, node, "TODO implement astgen.expr for .AnyFrameType", .{}),
.MultilineStringLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .MultilineStringLiteral", .{}),
.CharLiteral => return mod.failNode(scope, node, "TODO implement astgen.expr for .CharLiteral", .{}),
.ErrorSetDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ErrorSetDecl", .{}),
.ContainerDecl => return mod.failNode(scope, node, "TODO implement astgen.expr for .ContainerDecl", .{}),
.Comptime => return mod.failNode(scope, node, "TODO implement astgen.expr for .Comptime", .{}),
@ -497,6 +497,7 @@ fn arrayType(mod: *Module, scope: *Scope, node: *ast.Node.ArrayType) !*zir.Inst
.val = Value.initTag(.usize_type),
});
// TODO check for [_]T
const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr);
const child_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs);
@ -515,6 +516,7 @@ fn arrayTypeSentinel(mod: *Module, scope: *Scope, node: *ast.Node.ArrayTypeSenti
.val = Value.initTag(.usize_type),
});
// TODO check for [_]T
const len = try expr(mod, scope, .{ .ty = usize_type }, node.len_expr);
const sentinel_uncasted = try expr(mod, scope, .none, node.sentinel);
const elem_type = try expr(mod, scope, .{ .ty = meta_type }, node.rhs);
@ -1120,6 +1122,53 @@ fn stringLiteral(mod: *Module, scope: *Scope, str_lit: *ast.Node.OneToken) Inner
return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{});
}
fn multilineStrLiteral(mod: *Module, scope: *Scope, node: *ast.Node.MultilineStringLiteral) !*zir.Inst {
const tree = scope.tree();
const lines = node.linesConst();
const src = tree.token_locs[lines[0]].start;
// line lengths and new lines
var len = lines.len - 1;
for (lines) |line| {
len += tree.tokenSlice(line).len - 2;
}
const bytes = try scope.arena().alloc(u8, len);
var i: usize = 0;
for (lines) |line, line_i| {
if (line_i != 0) {
bytes[i] = '\n';
i += 1;
}
const slice = tree.tokenSlice(line)[2..];
mem.copy(u8, bytes[i..], slice);
i += slice.len;
}
return addZIRInst(mod, scope, src, zir.Inst.Str, .{ .bytes = bytes }, .{});
}
fn charLiteral(mod: *Module, scope: *Scope, node: *ast.Node.OneToken) !*zir.Inst {
const tree = scope.tree();
const src = tree.token_locs[node.token].start;
const slice = tree.tokenSlice(node.token);
var bad_index: usize = undefined;
const value = std.zig.parseCharLiteral(slice, &bad_index) catch |err| switch (err) {
error.InvalidCharacter => {
const bad_byte = slice[bad_index];
return mod.fail(scope, src + bad_index, "invalid character: '{c}'\n", .{bad_byte});
},
};
const int_payload = try scope.arena().create(Value.Payload.Int_u64);
int_payload.* = .{ .int = value };
return addZIRInstConst(mod, scope, src, .{
.ty = Type.initTag(.comptime_int),
.val = Value.initPayload(&int_payload.base),
});
}
fn integerLiteral(mod: *Module, scope: *Scope, int_lit: *ast.Node.OneToken) InnerError!*zir.Inst {
const arena = scope.arena();
const tree = scope.tree();

View File

@ -365,6 +365,7 @@ fn analyzeInstEnsureResultNonError(mod: *Module, scope: *Scope, inst: *zir.Inst.
fn analyzeInstAlloc(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError!*Inst {
const var_type = try resolveType(mod, scope, inst.positionals.operand);
// TODO this should happen only for var allocs
if (!var_type.isValidVarType()) {
return mod.fail(scope, inst.base.src, "variable of type '{}' must be const or comptime", .{var_type});
}

View File

@ -543,6 +543,38 @@ pub fn addCases(ctx: *TestContext) !void {
,
"",
);
case.addCompareOutput(
\\export fn _start() noreturn {
\\ const ignore =
\\ \\ cool thx
\\ \\
\\ ;
\\ add('ぁ', '\x03');
\\
\\ exit();
\\}
\\
\\fn add(a: u32, b: u32) void {
\\ assert(a + b == 12356);
\\}
\\
\\pub fn assert(ok: bool) void {
\\ if (!ok) unreachable; // assertion failure
\\}
\\
\\fn exit() noreturn {
\\ asm volatile ("syscall"
\\ :
\\ : [number] "{rax}" (231),
\\ [arg1] "{rdi}" (0)
\\ : "rcx", "r11", "memory"
\\ );
\\ unreachable;
\\}
,
"",
);
}
{