From aa0b93e87579e0a732fc6ef9cfa3d1cc071e8559 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:03:57 +0200 Subject: [PATCH 01/13] feat: new `control_code` It lowercases all constants and documents them. --- lib/std/ascii.zig | 106 ++++++++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 8174361800..4a4564d3d7 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -12,43 +12,77 @@ const std = @import("std"); /// Contains constants for the C0 control codes of the ASCII encoding. /// https://en.wikipedia.org/wiki/C0_and_C1_control_codes pub const control_code = struct { - pub const NUL = 0x00; - pub const SOH = 0x01; - pub const STX = 0x02; - pub const ETX = 0x03; - pub const EOT = 0x04; - pub const ENQ = 0x05; - pub const ACK = 0x06; - pub const BEL = 0x07; - pub const BS = 0x08; - pub const TAB = 0x09; - pub const LF = 0x0A; - pub const VT = 0x0B; - pub const FF = 0x0C; - pub const CR = 0x0D; - pub const SO = 0x0E; - pub const SI = 0x0F; - pub const DLE = 0x10; - pub const DC1 = 0x11; - pub const DC2 = 0x12; - pub const DC3 = 0x13; - pub const DC4 = 0x14; - pub const NAK = 0x15; - pub const SYN = 0x16; - pub const ETB = 0x17; - pub const CAN = 0x18; - pub const EM = 0x19; - pub const SUB = 0x1A; - pub const ESC = 0x1B; - pub const FS = 0x1C; - pub const GS = 0x1D; - pub const RS = 0x1E; - pub const US = 0x1F; + /// Null. + pub const nul = 0x00; + /// Start of Heading. + pub const soh = 0x01; + /// Start of Text. + pub const stx = 0x02; + /// End of Text. + pub const etx = 0x03; + /// End of Transmission. + pub const eot = 0x04; + /// Enquiry. + pub const enq = 0x05; + /// Acknowledge. + pub const ack = 0x06; + /// Bell, Alert. + pub const bel = 0x07; + /// Backspace. + pub const bs = 0x08; + /// Horizontal Tab, Tab ('\t'). + pub const ht = 0x09; + /// Line Feed, Newline ('\n'). + pub const lf = 0x0A; + /// Vertical Tab. + pub const vt = 0x0B; + /// Form Feed. + pub const ff = 0x0C; + /// Carriage Return ('\r'). + pub const cr = 0x0D; + /// Shift Out. + pub const so = 0x0E; + /// Shift In. + pub const si = 0x0F; + /// Data Link Escape. + pub const dle = 0x10; + /// Device Control One (XON). + pub const dc1 = 0x11; + /// Device Control Two. + pub const dc2 = 0x12; + /// Device Control Three (XOFF). + pub const dc3 = 0x13; + /// Device Control Four. + pub const dc4 = 0x14; + /// Negative Acknowledge. + pub const nak = 0x15; + /// Synchronous Idle. + pub const syn = 0x16; + /// End of Transmission Block + pub const etb = 0x17; + /// Cancel. + pub const can = 0x18; + /// End of Medium. + pub const em = 0x19; + /// Substitute. + pub const sub = 0x1A; + /// Escape. + pub const esc = 0x1B; + /// File separator. + pub const fs = 0x1C; + /// Group Separator. + pub const gs = 0x1D; + /// Record Separator. + pub const rs = 0x1E; + /// Unit separator. + pub const us = 0x1F; + /// Delete. + pub const del = 0x7F; - pub const DEL = 0x7F; - - pub const XON = 0x11; - pub const XOFF = 0x13; + /// An alias to `dc1`. + pub const xon = dc1; + /// An alias to `dc3`. + pub const xff = dc3; }; const tIndex = enum(u3) { From 93ca0c4a5e040baacfdda495439c4a21467e0e51 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:05:37 +0200 Subject: [PATCH 02/13] fix: off-by-one in `isCntrl` 0x1F (`control_code.us`) itself is also a control code. --- lib/std/ascii.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 4a4564d3d7..663f7df94f 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -232,7 +232,7 @@ pub fn isAlpha(c: u8) bool { } pub fn isCntrl(c: u8) bool { - return c < 0x20 or c == 127; //DEL + return c <= control_code.us or c == control_code.del; } pub fn isDigit(c: u8) bool { From d178df773a6a11eff0b7f06f41fe26930c228b91 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:13:46 +0200 Subject: [PATCH 03/13] api: deprecate `isBlank` and `isGraph`. I think `isBlank` and `isWhitespace` are quite confusable. What `isBlank` does is so simple that you can just do the `c == ' ' or c == '\t'` check yourself but in a lot of cases you don't even want that. `std.ascii` can't really know what you think "blank" means. That's why I think it's better to remove it. And again, it seems ambiguous considering that we have `isWhitespace`. Next, it also deprecates `isGraph`. It's the same as `isPrint(c) and c != ' '`, which I find confusing. When something is printable, you can say it also has a *graph*ical representation. Removing `isGraph` solves this possible confusion. --- lib/std/ascii.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 663f7df94f..a795ac9913 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -239,6 +239,7 @@ pub fn isDigit(c: u8) bool { return inTable(c, tIndex.Digit); } +/// DEPRECATED: use `isPrint(c) and c != ' '` instead pub fn isGraph(c: u8) bool { return inTable(c, tIndex.Graph); } @@ -285,6 +286,7 @@ pub fn isASCII(c: u8) bool { return c < 128; } +/// DEPRECATED: use `c == ' ' or c == '\x09'` or try `isWhitespace` pub fn isBlank(c: u8) bool { return (c == ' ') or (c == '\x09'); } From 02893d80cf013b14708d2262e9ecf0037cf6b3f2 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:31:12 +0200 Subject: [PATCH 04/13] api: rename and deprecate a bunch of functions `isAlNum` and `isAlpha`: 1. I think these names are a bit cryptic. 2. `isAlpha` is a bit ambiguous: is it alpha*numeric* or alpha*betic*? This is why I renamed `isAlpha` to `isAlphabetic`. 3. For consistency and because `isAlNum` looks weird, I renamed it to `isAlphanumeric`. `isCntrl`: 1. It's cryptic and hard to find when you look for it. 2. We don't save a lot of space writing it this way. 3. It's closer to the name of the `control_code` struct. `isSpace`: 1. The name is ambiguous and misleading. `spaces`: 1. Ditto `isXDigit`: 1. The name is extremely cryptic. 2. The function is very hard to find by its name. --- lib/std/ascii.zig | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index a795ac9913..7b9a2d4281 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -222,16 +222,31 @@ fn inTable(c: u8, t: tIndex) bool { return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0; } -pub fn isAlNum(c: u8) bool { +// remove all decls marked as DEPRECATED after 0.10.0 + +/// DEPRECATED: use `isAlphanumeric` +pub const isAlNum = isAlphanumeric; +/// DEPRECATED: use `isAlpha` +pub const isAlpha = isAlphabetic; +/// DEPRECATED: use `isAlpha` +pub const isCntrl = isControl; +/// DEPRECATED: use `isWhitespace`. +pub const isSpace = isWhitespace; +/// DEPRECATED: use `whitespace`. +pub const spaces = whitespace; +/// DEPRECATED: use `isHex`. +pub const isXDigit = isHex; + +pub fn isAlphanumeric(c: u8) bool { return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) | @as(u8, 1) << @enumToInt(tIndex.Digit))) != 0; } -pub fn isAlpha(c: u8) bool { +pub fn isAlphabetic(c: u8) bool { return inTable(c, tIndex.Alpha); } -pub fn isCntrl(c: u8) bool { +pub fn isControl(c: u8) bool { return c <= control_code.us or c == control_code.del; } @@ -256,21 +271,21 @@ pub fn isPunct(c: u8) bool { return inTable(c, tIndex.Punct); } -pub fn isSpace(c: u8) bool { +pub fn isWhitespace(c: u8) bool { return inTable(c, tIndex.Space); } -/// All the values for which isSpace() returns true. This may be used with -/// e.g. std.mem.trim() to trim whiteSpace. -pub const spaces = [_]u8{ ' ', '\t', '\n', '\r', control_code.VT, control_code.FF }; +/// Whitespace for general use. +/// This may be used with e.g. `std.mem.trim` to trim whitespace. +/// See also: `isSpace`. +pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff }; -test "spaces" { - const testing = std.testing; - for (spaces) |space| try testing.expect(isSpace(space)); +test "whitespace" { + for (whitespace) |char| try std.testing.expect(isWhitespace(char)); var i: u8 = 0; while (isASCII(i)) : (i += 1) { - if (isSpace(i)) try testing.expect(std.mem.indexOfScalar(u8, &spaces, i) != null); + if (isWhitespace(i)) try std.testing.expect(std.mem.indexOfScalar(u8, &whitespace, i) != null); } } @@ -278,7 +293,7 @@ pub fn isUpper(c: u8) bool { return inTable(c, tIndex.Upper); } -pub fn isXDigit(c: u8) bool { +pub fn isHex(c: u8) bool { return inTable(c, tIndex.Hex); } @@ -286,7 +301,7 @@ pub fn isASCII(c: u8) bool { return c < 128; } -/// DEPRECATED: use `c == ' ' or c == '\x09'` or try `isWhitespace` +/// DEPRECATED: use `c == ' ' or c == '\t'` or try `isWhitespace` pub fn isBlank(c: u8) bool { return (c == ' ') or (c == '\x09'); } From d130d09e2b9149380609fe43bd83553650d50c8c Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:39:53 +0200 Subject: [PATCH 05/13] docs: add more docs And improve some existing docs. --- lib/std/ascii.zig | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 7b9a2d4281..bc7f792854 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -1,16 +1,18 @@ -// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does. -// I could have taken only a u7 to make this clear, but it would be slower -// It is my opinion that encodings other than UTF-8 should not be supported. -// -// (and 128 bytes is not much to pay). -// Also does not handle Unicode character classes. -// -// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png +//! The 7-bit [ASCII](https://en.wikipedia.org/wiki/ASCII) character encoding standard. +//! +//! This is not to be confused with the 8-bit [extended ASCII](https://en.wikipedia.org/wiki/Extended_ASCII) character encoding. +//! +//! Even though this module concerns itself with 7-bit ASCII, +//! functions use `u8` as the type instead of `u7` for convenience and compatibility. +//! Characters outside of the 7-bit range are gracefully handled (e.g. by returning `false`). +//! +//! See also: https://en.wikipedia.org/wiki/ASCII#Character_set const std = @import("std"); -/// Contains constants for the C0 control codes of the ASCII encoding. -/// https://en.wikipedia.org/wiki/C0_and_C1_control_codes +/// The C0 control codes of the ASCII encoding. +/// +/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `is_control`. pub const control_code = struct { /// Null. pub const nul = 0x00; @@ -237,15 +239,20 @@ pub const spaces = whitespace; /// DEPRECATED: use `isHex`. pub const isXDigit = isHex; +/// Returns whether the character is alphanumeric. This is case-insensitive. pub fn isAlphanumeric(c: u8) bool { return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) | @as(u8, 1) << @enumToInt(tIndex.Digit))) != 0; } +/// Returns whether the character is alphabetic. This is case-insensitive. pub fn isAlphabetic(c: u8) bool { return inTable(c, tIndex.Alpha); } +/// Returns whether the character is a control character. +/// +/// See also: `control_code`. pub fn isControl(c: u8) bool { return c <= control_code.us or c == control_code.del; } @@ -259,10 +266,13 @@ pub fn isGraph(c: u8) bool { return inTable(c, tIndex.Graph); } +/// Returns whether the character is lowercased. pub fn isLower(c: u8) bool { return inTable(c, tIndex.Lower); } +/// Returns whether the character has some graphical representation and can be printed. +/// This also returns `true` for the space character. pub fn isPrint(c: u8) bool { return inTable(c, tIndex.Graph) or c == ' '; } @@ -271,6 +281,7 @@ pub fn isPunct(c: u8) bool { return inTable(c, tIndex.Punct); } +/// Returns whether this character is included in `whitespace`. pub fn isWhitespace(c: u8) bool { return inTable(c, tIndex.Space); } @@ -289,10 +300,12 @@ test "whitespace" { } } +/// Returns whether the character is uppercased. pub fn isUpper(c: u8) bool { return inTable(c, tIndex.Upper); } +/// Returns whether the character is a hexadecimal digit. This is case-insensitive. pub fn isHex(c: u8) bool { return inTable(c, tIndex.Hex); } @@ -306,6 +319,7 @@ pub fn isBlank(c: u8) bool { return (c == ' ') or (c == '\x09'); } +/// Upper-cases the character and returns it as-is if it's already upper-cased. pub fn toUpper(c: u8) u8 { if (isLower(c)) { return c & 0b11011111; @@ -314,6 +328,7 @@ pub fn toUpper(c: u8) u8 { } } +/// Lower-cases the character and returns it as-is if it's already lower-cased. pub fn toLower(c: u8) u8 { if (isUpper(c)) { return c | 0b00100000; @@ -392,7 +407,7 @@ test "allocUpperString" { try std.testing.expectEqualStrings("ABCDEFGHIJKLMNOPQRST0234+💩!", result); } -/// Compares strings `a` and `b` case insensitively and returns whether they are equal. +/// Compares strings `a` and `b` case-insensitively and returns whether they are equal. pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool { if (a.len != b.len) return false; for (a) |a_c, i| { From 9b79c6ae522f46c48352897dba0bdc9376b0cd78 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:42:24 +0200 Subject: [PATCH 06/13] test: update and add more tests --- lib/std/ascii.zig | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index bc7f792854..a96ccbc9c2 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -340,13 +340,50 @@ pub fn toLower(c: u8) u8 { test "ascii character classes" { const testing = std.testing; + try testing.expect(!isControl('a')); + try testing.expect(!isControl('z')); + try testing.expect(isControl(control_code.nul)); + try testing.expect(isControl(control_code.ff)); + try testing.expect(isControl(control_code.us)); + try testing.expect('C' == toUpper('c')); try testing.expect(':' == toUpper(':')); try testing.expect('\xab' == toUpper('\xab')); + try testing.expect(!isUpper('z')); + try testing.expect('c' == toLower('C')); + try testing.expect(':' == toLower(':')); + try testing.expect('\xab' == toLower('\xab')); + try testing.expect(!isLower('Z')); + + try testing.expect(isAlphanumeric('Z')); + try testing.expect(isAlphanumeric('z')); + try testing.expect(isAlphanumeric('5')); + try testing.expect(isAlphanumeric('5')); + try testing.expect(!isAlphanumeric('!')); + + try testing.expect(!isAlpha('5')); try testing.expect(isAlpha('c')); try testing.expect(!isAlpha('5')); - try testing.expect(isSpace(' ')); + + try testing.expect(isWhitespace(' ')); + try testing.expect(isWhitespace('\t')); + try testing.expect(isWhitespace('\r')); + try testing.expect(isWhitespace('\n')); + try testing.expect(!isWhitespace('.')); + + try testing.expect(!isHex('g')); + try testing.expect(isHex('b')); + try testing.expect(isHex('9')); + + try testing.expect(!isDigit('~')); + try testing.expect(isDigit('0')); + try testing.expect(isDigit('9')); + + try testing.expect(isPrint(' ')); + try testing.expect(isPrint('@')); + try testing.expect(isPrint('~')); + try testing.expect(!isPrint(control_code.esc)); } /// Writes a lower case copy of `ascii_string` to `output`. @@ -463,7 +500,6 @@ test "indexOfIgnoreCase" { try std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null); try std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0); try std.testing.expect(indexOfIgnoreCase("foo", "fool") == null); - try std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0); } From 028134055cca7c8ec9799a0fea4bccf285e5b722 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:43:40 +0200 Subject: [PATCH 07/13] fix: remove outdated TODO It was supposed to be removed: https://github.com/ziglang/zig/commit/490654c332f2d8eaf7edffa35ea0523800df998d#r47643278 --- lib/std/ascii.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index a96ccbc9c2..3d82b47df2 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -517,8 +517,7 @@ pub fn orderIgnoreCase(lhs: []const u8, rhs: []const u8) std.math.Order { return std.math.order(lhs.len, rhs.len); } -/// Returns true if lhs < rhs, false otherwise -/// TODO rename "IgnoreCase" to "Insensitive" in this entire file. +/// Returns whether `lhs` < `rhs`. pub fn lessThanIgnoreCase(lhs: []const u8, rhs: []const u8) bool { return orderIgnoreCase(lhs, rhs) == .lt; } From 8890190857524de10026d3ab0edaca86e8432a5c Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:46:26 +0200 Subject: [PATCH 08/13] fix: add missing 'o' --- lib/std/ascii.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 3d82b47df2..50715a9ca2 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -78,13 +78,14 @@ pub const control_code = struct { pub const rs = 0x1E; /// Unit separator. pub const us = 0x1F; + /// Delete. pub const del = 0x7F; /// An alias to `dc1`. pub const xon = dc1; /// An alias to `dc3`. - pub const xff = dc3; + pub const xoff = dc3; }; const tIndex = enum(u3) { From 846dbf2745561d8c264527973860b16ae698cb9a Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 21:49:21 +0200 Subject: [PATCH 09/13] docs: remove some hyphens --- lib/std/ascii.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 50715a9ca2..07d2e065bf 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -320,7 +320,7 @@ pub fn isBlank(c: u8) bool { return (c == ' ') or (c == '\x09'); } -/// Upper-cases the character and returns it as-is if it's already upper-cased. +/// Uppercases the character and returns it as-is if it's already uppercased. pub fn toUpper(c: u8) u8 { if (isLower(c)) { return c & 0b11011111; @@ -329,7 +329,7 @@ pub fn toUpper(c: u8) u8 { } } -/// Lower-cases the character and returns it as-is if it's already lower-cased. +/// Lowercases the character and returns it as-is if it's already lowercased. pub fn toLower(c: u8) u8 { if (isUpper(c)) { return c | 0b00100000; From ee97fbc199a6b5cf04637d044650d407ce44f883 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Sun, 14 Aug 2022 22:01:52 +0200 Subject: [PATCH 10/13] api: deprecate `isPunct` too See https://github.com/ziglang/zig/issues/8419#issuecomment-843719898 --- lib/std/ascii.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 07d2e065bf..55946cfc33 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -278,6 +278,7 @@ pub fn isPrint(c: u8) bool { return inTable(c, tIndex.Graph) or c == ' '; } +/// DEPRECATED: create your own function based on your needs and what you want to do. pub fn isPunct(c: u8) bool { return inTable(c, tIndex.Punct); } From 298062897da5de0766f180ebb7a66a6f1e91af88 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Tue, 16 Aug 2022 21:37:02 +0200 Subject: [PATCH 11/13] docs: fixes and improvements --- lib/std/ascii.zig | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 55946cfc33..06cbe71925 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -12,7 +12,7 @@ const std = @import("std"); /// The C0 control codes of the ASCII encoding. /// -/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `is_control`. +/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`. pub const control_code = struct { /// Null. pub const nul = 0x00; @@ -240,24 +240,26 @@ pub const spaces = whitespace; /// DEPRECATED: use `isHex`. pub const isXDigit = isHex; -/// Returns whether the character is alphanumeric. This is case-insensitive. +/// Returns whether the character is alphanumeric. pub fn isAlphanumeric(c: u8) bool { return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) | @as(u8, 1) << @enumToInt(tIndex.Digit))) != 0; } -/// Returns whether the character is alphabetic. This is case-insensitive. +/// Returns whether the character is alphabetic. pub fn isAlphabetic(c: u8) bool { return inTable(c, tIndex.Alpha); } /// Returns whether the character is a control character. +/// This is the same as `!isPrint(c)`. /// /// See also: `control_code`. pub fn isControl(c: u8) bool { return c <= control_code.us or c == control_code.del; } +/// Returns whether the character is a digit. pub fn isDigit(c: u8) bool { return inTable(c, tIndex.Digit); } @@ -267,13 +269,14 @@ pub fn isGraph(c: u8) bool { return inTable(c, tIndex.Graph); } -/// Returns whether the character is lowercased. +/// Returns whether the character is a lowercased letter. pub fn isLower(c: u8) bool { return inTable(c, tIndex.Lower); } /// Returns whether the character has some graphical representation and can be printed. /// This also returns `true` for the space character. +/// This is the same as `!isControl(c)`. pub fn isPrint(c: u8) bool { return inTable(c, tIndex.Graph) or c == ' '; } @@ -290,7 +293,8 @@ pub fn isWhitespace(c: u8) bool { /// Whitespace for general use. /// This may be used with e.g. `std.mem.trim` to trim whitespace. -/// See also: `isSpace`. +/// +/// See also: `isWhitespace`. pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff }; test "whitespace" { @@ -302,7 +306,7 @@ test "whitespace" { } } -/// Returns whether the character is uppercased. +/// Returns whether the character is an uppercased letter. pub fn isUpper(c: u8) bool { return inTable(c, tIndex.Upper); } @@ -312,6 +316,7 @@ pub fn isHex(c: u8) bool { return inTable(c, tIndex.Hex); } +/// Returns whether the character is a 7-bit ASCII character. pub fn isASCII(c: u8) bool { return c < 128; } @@ -321,7 +326,7 @@ pub fn isBlank(c: u8) bool { return (c == ' ') or (c == '\x09'); } -/// Uppercases the character and returns it as-is if it's already uppercased. +/// Uppercases the character and returns it as-is if it's already uppercased or not a letter. pub fn toUpper(c: u8) u8 { if (isLower(c)) { return c & 0b11011111; @@ -330,7 +335,7 @@ pub fn toUpper(c: u8) u8 { } } -/// Lowercases the character and returns it as-is if it's already lowercased. +/// Lowercases the character and returns it as-is if it's already lowercased or not a letter. pub fn toLower(c: u8) u8 { if (isUpper(c)) { return c | 0b00100000; @@ -505,7 +510,7 @@ test "indexOfIgnoreCase" { try std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0); } -/// Compares two slices of numbers lexicographically. O(n). +/// Returns the lexicographical order of two slices. O(n). pub fn orderIgnoreCase(lhs: []const u8, rhs: []const u8) std.math.Order { const n = std.math.min(lhs.len, rhs.len); var i: usize = 0; @@ -519,7 +524,7 @@ pub fn orderIgnoreCase(lhs: []const u8, rhs: []const u8) std.math.Order { return std.math.order(lhs.len, rhs.len); } -/// Returns whether `lhs` < `rhs`. +/// Returns whether the lexicographical order of `lhs` is lower than `rhs`. pub fn lessThanIgnoreCase(lhs: []const u8, rhs: []const u8) bool { return orderIgnoreCase(lhs, rhs) == .lt; } From 37880f2385fee17a2d2ff4388cbe2b3395ed47fa Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Wed, 17 Aug 2022 20:07:18 +0200 Subject: [PATCH 12/13] api: deprecate lowercase control_code consts --- lib/std/ascii.zig | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 06cbe71925..7087fa287f 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -14,6 +14,77 @@ const std = @import("std"); /// /// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`. pub const control_code = struct { + // DEPRECATED: use the lowercase variant + pub const NUL = 0x00; + // DEPRECATED: use the lowercase variant + pub const SOH = 0x01; + // DEPRECATED: use the lowercase variant + pub const STX = 0x02; + // DEPRECATED: use the lowercase variant + pub const ETX = 0x03; + // DEPRECATED: use the lowercase variant + pub const EOT = 0x04; + // DEPRECATED: use the lowercase variant + pub const ENQ = 0x05; + // DEPRECATED: use the lowercase variant + pub const ACK = 0x06; + // DEPRECATED: use the lowercase variant + pub const BEL = 0x07; + // DEPRECATED: use the lowercase variant + pub const BS = 0x08; + // DEPRECATED: use `ht` + pub const TAB = 0x09; + // DEPRECATED: use the lowercase variant + pub const LF = 0x0A; + // DEPRECATED: use the lowercase variant + pub const VT = 0x0B; + // DEPRECATED: use the lowercase variant + pub const FF = 0x0C; + // DEPRECATED: use the lowercase variant + pub const CR = 0x0D; + // DEPRECATED: use the lowercase variant + pub const SO = 0x0E; + // DEPRECATED: use the lowercase variant + pub const SI = 0x0F; + // DEPRECATED: use the lowercase variant + pub const DLE = 0x10; + // DEPRECATED: use the lowercase variant + pub const DC1 = 0x11; + // DEPRECATED: use the lowercase variant + pub const DC2 = 0x12; + // DEPRECATED: use the lowercase variant + pub const DC3 = 0x13; + // DEPRECATED: use the lowercase variant + pub const DC4 = 0x14; + // DEPRECATED: use the lowercase variant + pub const NAK = 0x15; + // DEPRECATED: use the lowercase variant + pub const SYN = 0x16; + // DEPRECATED: use the lowercase variant + pub const ETB = 0x17; + // DEPRECATED: use the lowercase variant + pub const CAN = 0x18; + // DEPRECATED: use the lowercase variant + pub const EM = 0x19; + // DEPRECATED: use the lowercase variant + pub const SUB = 0x1A; + // DEPRECATED: use the lowercase variant + pub const ESC = 0x1B; + // DEPRECATED: use the lowercase variant + pub const FS = 0x1C; + // DEPRECATED: use the lowercase variant + pub const GS = 0x1D; + // DEPRECATED: use the lowercase variant + pub const RS = 0x1E; + // DEPRECATED: use the lowercase variant + pub const US = 0x1F; + // DEPRECATED: use the lowercase variant + pub const DEL = 0x7F; + // DEPRECATED: use the lowercase variant + pub const XON = 0x11; + // DEPRECATED: use the lowercase variant + pub const XOFF = 0x13; + /// Null. pub const nul = 0x00; /// Start of Heading. From 4ea3a9ba9f867a74f3f6c8908c3ebba0876771f1 Mon Sep 17 00:00:00 2001 From: r00ster91 Date: Wed, 17 Aug 2022 20:08:13 +0200 Subject: [PATCH 13/13] fix: comments --- lib/std/ascii.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig index 7087fa287f..cd8b14e98f 100644 --- a/lib/std/ascii.zig +++ b/lib/std/ascii.zig @@ -10,6 +10,8 @@ const std = @import("std"); +// TODO: remove all decls marked as DEPRECATED after 0.10.0's release + /// The C0 control codes of the ASCII encoding. /// /// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`. @@ -141,13 +143,13 @@ pub const control_code = struct { pub const sub = 0x1A; /// Escape. pub const esc = 0x1B; - /// File separator. + /// File Separator. pub const fs = 0x1C; /// Group Separator. pub const gs = 0x1D; /// Record Separator. pub const rs = 0x1E; - /// Unit separator. + /// Unit Separator. pub const us = 0x1F; /// Delete. @@ -296,8 +298,6 @@ fn inTable(c: u8, t: tIndex) bool { return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0; } -// remove all decls marked as DEPRECATED after 0.10.0 - /// DEPRECATED: use `isAlphanumeric` pub const isAlNum = isAlphanumeric; /// DEPRECATED: use `isAlpha`