From 8d38a91ca8b52d8e209db5041bd6f351da9cac22 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 22 Sep 2020 15:15:41 +0300 Subject: [PATCH 1/3] std.fmt: add specifier for Zig identifiers --- lib/std/fmt.zig | 88 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 7 deletions(-) diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index a3a97020bf..5b673eec1b 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -65,6 +65,8 @@ fn peekIsAlign(comptime fmt: []const u8) bool { /// - format the non-numeric value as a string of bytes in hexadecimal notation ("binary dump") in either lower case or upper case /// - output numeric value in hexadecimal notation /// - `s`: print a pointer-to-many as a c-string, use zero-termination +/// - `z`: escape the string with @"" syntax if it is not a valid Zig identifier. +/// - `Z`: print the string escaping non-printable characters using Zig escape sequences. /// - `B` and `Bi`: output a memory size in either metric (1000) or power-of-two (1024) based notation. works for both float and integer values. /// - `e` and `E`: if printing a string, escape non-printable characters /// - `e`: output floating point value in scientific notation @@ -543,7 +545,14 @@ pub fn formatIntValue( } else { @compileError("Cannot print integer that is larger than 8 bits as a ascii"); } - } else if (comptime std.mem.eql(u8, fmt, "b")) { + } else if (comptime std.mem.eql(u8, fmt, "Z")) { + if (@typeInfo(@TypeOf(int_value)).Int.bits <= 8) { + const c: u8 = int_value; + return formatZigEscapes(@as(*const [1]u8, &c), options, writer); + } else { + @compileError("Cannot escape character with more than 8 bits"); + } + }else if (comptime std.mem.eql(u8, fmt, "b")) { radix = 2; uppercase = false; } else if (comptime std.mem.eql(u8, fmt, "x")) { @@ -612,6 +621,10 @@ pub fn formatText( } } return; + } else if (comptime std.mem.eql(u8, fmt, "z")) { + return formatZigIdentifier(bytes, options, writer); + } else if (comptime std.mem.eql(u8, fmt, "Z")) { + return formatZigEscapes(bytes, options, writer); } else { @compileError("Unknown format string: '" ++ fmt ++ "'"); } @@ -652,9 +665,62 @@ pub fn formatBuf( } } -// Print a float in scientific notation to the specified precision. Null uses full precision. -// It should be the case that every full precision, printed value can be re-parsed back to the -// same type unambiguously. +/// Print the string as a Zig identifier escaping it with @"" syntax if needed. +pub fn formatZigIdentifier( + bytes: []const u8, + options: FormatOptions, + writer: anytype, +) !void { + if (isValidZigIdentifier(bytes)) { + return writer.writeAll(bytes); + } + try writer.writeAll("@\""); + try formatZigEscapes(bytes, options, writer); + try writer.writeByte('"'); +} + +fn isValidZigIdentifier(bytes: []const u8) bool { + for (bytes) |c, i| { + switch (c) { + '_', 'a'...'z', 'A'...'Z' => {}, + '0'...'9' => if (i == 0) return false, + else => return false, + } + } + return std.zig.Token.getKeyword(bytes) == null; +} + +pub fn formatZigEscapes( + bytes: []const u8, + options: FormatOptions, + writer: anytype, +) !void { + for (bytes) |c| { + const s: []const u8 = switch (c) { + '\"' => "\\\"", + '\'' => "\\'", + '\\' => "\\\\", + '\n' => "\\n", + '\r' => "\\r", + '\t' => "\\t", + // Handle the remaining escapes Zig doesn't support by turning them + // into their respective hex representation + else => if (std.ascii.isCntrl(c)) { + try writer.writeAll("\\x"); + try formatInt(c, 16, false, .{ .width = 2, .fill = '0' }, writer); + continue; + } else { + try writer.writeByte(c); + continue; + }, + }; + try writer.writeAll(s); + } +} + +/// Print a float in scientific notation to the specified precision. Null uses full precision. +/// It should be the case that every full precision, printed value can be re-parsed back to the +/// same type unambiguously. pub fn formatFloatScientific( value: anytype, options: FormatOptions, @@ -746,8 +812,8 @@ pub fn formatFloatScientific( } } -// Print a float of the format x.yyyyy where the number of y is specified by the precision argument. -// By default floats are printed at full precision (no rounding). +/// Print a float of the format x.yyyyy where the number of y is specified by the precision argument. +/// By default floats are printed at full precision (no rounding). pub fn formatFloatDecimal( value: anytype, options: FormatOptions, @@ -1136,7 +1202,7 @@ pub fn bufPrintZ(buf: []u8, comptime fmt: []const u8, args: anytype) BufPrintErr return result[0 .. result.len - 1 :0]; } -// Count the characters needed for format. Useful for preallocating memory +/// Count the characters needed for format. Useful for preallocating memory pub fn count(comptime fmt: []const u8, args: anytype) u64 { var counting_writer = std.io.countingWriter(std.io.null_writer); format(counting_writer.writer(), fmt, args) catch |err| switch (err) {}; @@ -1334,6 +1400,14 @@ test "escape non-printable" { try testFmt("ab\\xFFc", "{E}", .{"ab\xffc"}); } +test "escape invalid identifiers" { + try testFmt("@\"while\"", "{z}", .{"while"}); + try testFmt("hello", "{z}", .{"hello"}); + try testFmt("@\"11\\\"23\"", "{z}", .{"11\"23"}); + try testFmt("@\"11\\x0f23\"", "{z}", .{"11\x0F23"}); + try testFmt("\\x0f", "{Z}", .{0x0f}); +} + test "pointer" { { const value = @intToPtr(*align(1) i32, 0xdeadbeef); From 2c294676b52f2ba62172fa778a198f92d6a969f0 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 22 Sep 2020 15:28:37 +0300 Subject: [PATCH 2/3] use new format specifier in translate-c and std lib --- lib/std/build.zig | 12 ++++----- src/translate_c.zig | 66 +++------------------------------------------ 2 files changed, 9 insertions(+), 69 deletions(-) diff --git a/lib/std/build.zig b/lib/std/build.zig index 0a12b10db4..4e082ff9ad 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -1767,7 +1767,7 @@ pub const LibExeObjStep = struct { const out = self.build_options_contents.outStream(); switch (T) { []const []const u8 => { - out.print("pub const {}: []const []const u8 = &[_][]const u8{{\n", .{name}) catch unreachable; + out.print("pub const {z}: []const []const u8 = &[_][]const u8{{\n", .{name}) catch unreachable; for (value) |slice| { out.writeAll(" ") catch unreachable; std.zig.renderStringLiteral(slice, out) catch unreachable; @@ -1777,13 +1777,13 @@ pub const LibExeObjStep = struct { return; }, []const u8 => { - out.print("pub const {}: []const u8 = ", .{name}) catch unreachable; + out.print("pub const {z}: []const u8 = ", .{name}) catch unreachable; std.zig.renderStringLiteral(value, out) catch unreachable; out.writeAll(";\n") catch unreachable; return; }, ?[]const u8 => { - out.print("pub const {}: ?[]const u8 = ", .{name}) catch unreachable; + out.print("pub const {z}: ?[]const u8 = ", .{name}) catch unreachable; if (value) |payload| { std.zig.renderStringLiteral(payload, out) catch unreachable; out.writeAll(";\n") catch unreachable; @@ -1796,15 +1796,15 @@ pub const LibExeObjStep = struct { } switch (@typeInfo(T)) { .Enum => |enum_info| { - out.print("pub const {} = enum {{\n", .{@typeName(T)}) catch unreachable; + out.print("pub const {z} = enum {{\n", .{@typeName(T)}) catch unreachable; inline for (enum_info.fields) |field| { - out.print(" {},\n", .{field.name}) catch unreachable; + out.print(" {z},\n", .{field.name}) catch unreachable; } out.writeAll("};\n") catch unreachable; }, else => {}, } - out.print("pub const {} = {};\n", .{ name, value }) catch unreachable; + out.print("pub const {z} = {};\n", .{ name, value }) catch unreachable; } /// The value is the path in the cache dir. diff --git a/src/translate_c.zig b/src/translate_c.zig index 982467aa8f..1efa50d9fa 100644 --- a/src/translate_c.zig +++ b/src/translate_c.zig @@ -1972,16 +1972,7 @@ fn transStringLiteral( const bytes_ptr = stmt.getString_bytes_begin_size(&len); const str = bytes_ptr[0..len]; - var char_buf: [4]u8 = undefined; - len = 0; - for (str) |c| len += escapeChar(c, &char_buf).len; - - const buf = try rp.c.arena.alloc(u8, len + "\"\"".len); - buf[0] = '"'; - writeEscapedString(buf[1..], str); - buf[buf.len - 1] = '"'; - - const token = try appendToken(rp.c, .StringLiteral, buf); + const token = try appendTokenFmt(rp.c, .StringLiteral, "\"{Z}\"", .{str}); const node = try rp.c.arena.create(ast.Node.OneToken); node.* = .{ .base = .{ .tag = .StringLiteral }, @@ -1999,41 +1990,6 @@ fn transStringLiteral( } } -fn escapedStringLen(s: []const u8) usize { - var len: usize = 0; - var char_buf: [4]u8 = undefined; - for (s) |c| len += escapeChar(c, &char_buf).len; - return len; -} - -fn writeEscapedString(buf: []u8, s: []const u8) void { - var char_buf: [4]u8 = undefined; - var i: usize = 0; - for (s) |c| { - const escaped = escapeChar(c, &char_buf); - mem.copy(u8, buf[i..], escaped); - i += escaped.len; - } -} - -// Returns either a string literal or a slice of `buf`. -fn escapeChar(c: u8, char_buf: *[4]u8) []const u8 { - return switch (c) { - '\"' => "\\\"", - '\'' => "\\'", - '\\' => "\\\\", - '\n' => "\\n", - '\r' => "\\r", - '\t' => "\\t", - // Handle the remaining escapes Zig doesn't support by turning them - // into their respective hex representation - else => if (std.ascii.isCntrl(c)) - std.fmt.bufPrint(char_buf, "\\x{x:0>2}", .{c}) catch unreachable - else - std.fmt.bufPrint(char_buf, "{c}", .{c}) catch unreachable, - }; -} - fn transCCast( rp: RestorePoint, scope: *Scope, @@ -2922,8 +2878,7 @@ fn transCharLiteral( if (val > 255) break :blk try transCreateNodeInt(rp.c, val); } - var char_buf: [4]u8 = undefined; - const token = try appendTokenFmt(rp.c, .CharLiteral, "'{}'", .{escapeChar(@intCast(u8, val), &char_buf)}); + const token = try appendTokenFmt(rp.c, .CharLiteral, "'{Z}'", .{@intCast(u8, val)}); const node = try rp.c.arena.create(ast.Node.OneToken); node.* = .{ .base = .{ .tag = .CharLiteral }, @@ -5247,23 +5202,8 @@ fn isZigPrimitiveType(name: []const u8) bool { mem.eql(u8, name, "c_ulonglong"); } -fn isValidZigIdentifier(name: []const u8) bool { - for (name) |c, i| { - switch (c) { - '_', 'a'...'z', 'A'...'Z' => {}, - '0'...'9' => if (i == 0) return false, - else => return false, - } - } - return true; -} - fn appendIdentifier(c: *Context, name: []const u8) !ast.TokenIndex { - if (!isValidZigIdentifier(name) or std.zig.Token.getKeyword(name) != null) { - return appendTokenFmt(c, .Identifier, "@\"{}\"", .{name}); - } else { - return appendTokenFmt(c, .Identifier, "{}", .{name}); - } + return appendTokenFmt(c, .Identifier, "{z}", .{name}); } fn transCreateNodeIdentifier(c: *Context, name: []const u8) !*ast.Node { From e8ca1b254d41d5711dc5294d99b8d81c74f36add Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 17 Oct 2020 17:12:58 +0300 Subject: [PATCH 3/3] std: remove renderStringLiteral in favor of std.fmt specifier --- lib/std/build.zig | 15 ++++--------- lib/std/fmt.zig | 40 +++++++++++++++------------------- lib/std/zig.zig | 1 - lib/std/zig/string_literal.zig | 30 ------------------------- src/value.zig | 3 ++- src/zir.zig | 8 +++---- 6 files changed, 28 insertions(+), 69 deletions(-) diff --git a/lib/std/build.zig b/lib/std/build.zig index 4e082ff9ad..f429efa602 100644 --- a/lib/std/build.zig +++ b/lib/std/build.zig @@ -1769,24 +1769,19 @@ pub const LibExeObjStep = struct { []const []const u8 => { out.print("pub const {z}: []const []const u8 = &[_][]const u8{{\n", .{name}) catch unreachable; for (value) |slice| { - out.writeAll(" ") catch unreachable; - std.zig.renderStringLiteral(slice, out) catch unreachable; - out.writeAll(",\n") catch unreachable; + out.print(" \"{Z}\",\n", .{slice}) catch unreachable; } out.writeAll("};\n") catch unreachable; return; }, []const u8 => { - out.print("pub const {z}: []const u8 = ", .{name}) catch unreachable; - std.zig.renderStringLiteral(value, out) catch unreachable; - out.writeAll(";\n") catch unreachable; + out.print("pub const {z}: []const u8 = \"{Z}\";\n", .{ name, value }) catch unreachable; return; }, ?[]const u8 => { out.print("pub const {z}: ?[]const u8 = ", .{name}) catch unreachable; if (value) |payload| { - std.zig.renderStringLiteral(payload, out) catch unreachable; - out.writeAll(";\n") catch unreachable; + out.print("\"{Z}\";\n", .{payload}) catch unreachable; } else { out.writeAll("null;\n") catch unreachable; } @@ -2017,9 +2012,7 @@ pub const LibExeObjStep = struct { // Render build artifact options at the last minute, now that the path is known. for (self.build_options_artifact_args.items) |item| { const out = self.build_options_contents.writer(); - out.print("pub const {}: []const u8 = ", .{item.name}) catch unreachable; - std.zig.renderStringLiteral(item.artifact.getOutputPath(), out) catch unreachable; - out.writeAll(";\n") catch unreachable; + out.print("pub const {}: []const u8 = \"{Z}\";\n", .{ item.name, item.artifact.getOutputPath() }) catch unreachable; } const build_options_file = try fs.path.join( diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index 5b673eec1b..acb8a3d586 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -552,7 +552,7 @@ pub fn formatIntValue( } else { @compileError("Cannot escape character with more than 8 bits"); } - }else if (comptime std.mem.eql(u8, fmt, "b")) { + } else if (comptime std.mem.eql(u8, fmt, "b")) { radix = 2; uppercase = false; } else if (comptime std.mem.eql(u8, fmt, "x")) { @@ -695,27 +695,20 @@ pub fn formatZigEscapes( options: FormatOptions, writer: anytype, ) !void { - for (bytes) |c| { - const s: []const u8 = switch (c) { - '\"' => "\\\"", - '\'' => "\\'", - '\\' => "\\\\", - '\n' => "\\n", - '\r' => "\\r", - '\t' => "\\t", - // Handle the remaining escapes Zig doesn't support by turning them - // into their respective hex representation - else => if (std.ascii.isCntrl(c)) { - try writer.writeAll("\\x"); - try formatInt(c, 16, false, .{ .width = 2, .fill = '0' }, writer); - continue; - } else { - try writer.writeByte(c); - continue; - }, - }; - try writer.writeAll(s); - } + for (bytes) |byte| switch (byte) { + '\n' => try writer.writeAll("\\n"), + '\r' => try writer.writeAll("\\r"), + '\t' => try writer.writeAll("\\t"), + '\\' => try writer.writeAll("\\\\"), + '"' => try writer.writeAll("\\\""), + '\'' => try writer.writeAll("\\'"), + ' ', '!', '#'...'&', '('...'[', ']'...'~' => try writer.writeByte(byte), + // Use hex escapes for rest any unprintable characters. + else => { + try writer.writeAll("\\x"); + try formatInt(byte, 16, false, .{ .width = 2, .fill = '0' }, writer); + }, + }; } /// Print a float in scientific notation to the specified precision. Null uses full precision. @@ -1406,6 +1399,9 @@ test "escape invalid identifiers" { try testFmt("@\"11\\\"23\"", "{z}", .{"11\"23"}); try testFmt("@\"11\\x0f23\"", "{z}", .{"11\x0F23"}); try testFmt("\\x0f", "{Z}", .{0x0f}); + try testFmt( + \\" \\ hi \x07 \x11 \" derp \'" + , "\"{Z}\"", .{" \\ hi \x07 \x11 \" derp '"}); } test "pointer" { diff --git a/lib/std/zig.zig b/lib/std/zig.zig index c28b9b3e35..06a74a9786 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -11,7 +11,6 @@ pub const Tokenizer = tokenizer.Tokenizer; pub const parse = @import("zig/parse.zig").parse; pub const parseStringLiteral = @import("zig/string_literal.zig").parse; pub const render = @import("zig/render.zig").render; -pub const renderStringLiteral = @import("zig/string_literal.zig").render; pub const ast = @import("zig/ast.zig"); pub const system = @import("zig/system.zig"); pub const CrossTarget = @import("zig/cross_target.zig").CrossTarget; diff --git a/lib/std/zig/string_literal.zig b/lib/std/zig/string_literal.zig index ca3585944e..b92d795eee 100644 --- a/lib/std/zig/string_literal.zig +++ b/lib/std/zig/string_literal.zig @@ -127,33 +127,3 @@ test "parse" { expect(eql(u8, "foo", try parse(alloc, "\"f\x6f\x6f\"", &bad_index))); expect(eql(u8, "f💯", try parse(alloc, "\"f\u{1f4af}\"", &bad_index))); } - -/// Writes a Zig-syntax escaped string literal to the stream. Includes the double quotes. -pub fn render(utf8: []const u8, out_stream: anytype) !void { - try out_stream.writeByte('"'); - for (utf8) |byte| switch (byte) { - '\n' => try out_stream.writeAll("\\n"), - '\r' => try out_stream.writeAll("\\r"), - '\t' => try out_stream.writeAll("\\t"), - '\\' => try out_stream.writeAll("\\\\"), - '"' => try out_stream.writeAll("\\\""), - ' ', '!', '#'...'[', ']'...'~' => try out_stream.writeByte(byte), - else => try out_stream.print("\\x{x:0>2}", .{byte}), - }; - try out_stream.writeByte('"'); -} - -test "render" { - const expect = std.testing.expect; - const eql = std.mem.eql; - - var fixed_buf_mem: [32]u8 = undefined; - - { - var fbs = std.io.fixedBufferStream(&fixed_buf_mem); - try render(" \\ hi \x07 \x11 \" derp", fbs.outStream()); - expect(eql(u8, - \\" \\ hi \x07 \x11 \" derp" - , fbs.getWritten())); - } -} diff --git a/src/value.zig b/src/value.zig index a99ea4c04b..e8fe848d5d 100644 --- a/src/value.zig +++ b/src/value.zig @@ -350,7 +350,8 @@ pub const Value = extern union { val = elem_ptr.array_ptr; }, .empty_array => return out_stream.writeAll(".{}"), - .enum_literal, .bytes => return std.zig.renderStringLiteral(self.cast(Payload.Bytes).?.data, out_stream), + .enum_literal => return out_stream.print(".{z}", .{self.cast(Payload.Bytes).?.data}), + .bytes => return out_stream.print("\"{Z}\"", .{self.cast(Payload.Bytes).?.data}), .repeated => { try out_stream.writeAll("(repeated) "); val = val.cast(Payload.Repeated).?.val; diff --git a/src/zir.zig b/src/zir.zig index 96081afc55..76ba89e9c7 100644 --- a/src/zir.zig +++ b/src/zir.zig @@ -1216,17 +1216,17 @@ const Writer = struct { try stream.writeByte('}'); }, bool => return stream.writeByte("01"[@boolToInt(param)]), - []u8, []const u8 => return std.zig.renderStringLiteral(param, stream), + []u8, []const u8 => return stream.print("\"{Z}\"", .{param}), BigIntConst, usize => return stream.print("{}", .{param}), TypedValue => unreachable, // this is a special case *IrModule.Decl => unreachable, // this is a special case *Inst.Block => { const name = self.block_table.get(param).?; - return std.zig.renderStringLiteral(name, stream); + return stream.print("\"{Z}\"", .{name}); }, *Inst.Loop => { const name = self.loop_table.get(param).?; - return std.zig.renderStringLiteral(name, stream); + return stream.print("\"{Z}\"", .{name}); }, [][]const u8 => { try stream.writeByte('['); @@ -1234,7 +1234,7 @@ const Writer = struct { if (i != 0) { try stream.writeAll(", "); } - try std.zig.renderStringLiteral(str, stream); + try stream.print("\"{Z}\"", .{str}); } try stream.writeByte(']'); },