Simple cli and ziql tokenizer

2024-09-02 20:22:46 +02:00 · 2024-09-02 20:22:46 +02:00 · be84a8e481
commit be84a8e481
parent e3f0ff1d84
7 changed files with 292 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -10,19 +10,68 @@ Zig is fast, blablabla

 Meme "That's the neat part..."

-Zippon is a strutural relational potentially in memory written entirely in Zig from stractch.
+Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch.

-You build a binary according to your schema, you can just run it with some arguments and it will create and manage a folder 'zipponDB_DATA'.
+You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'.
 Then you do what you want with it, including:
- Run it with your app as a file and folder
+- Run it with your app as a seperated process and folder
 - Create a Docker and open some port
- Create a Docker with a small API
+- Create a Docker with a small API like flask
 - Other stuffs, Im sure some will find something nice

-Note that you can have multiple binary that run together. Each binary have a unique id that is use to segregate binary inside the folder 'zipponDB_DATA'
+# Integration
+
+## Python
+
+```python
+import zippondb as zdb
+
+client = zdb.newClient('path/to/binary')
+print(client.run('describe'))
+
+users = client.run('GRAB User {}')
+for user in users:
+    print(user.name)
+
+client.run('save')
+```

 # Benchmark

+I did a database with random data. The schema is like that:
+```
+User {
+    name: str,
+    email: str,
+    friends: []User.friends,
+    posts: []Post.from,
+    liked_post: []Post.like_by,
+    comments: []Comment.from,
+    liked_com: []Comment.like_by,
+}
+
+Post {
+    title: str,
+    image: str,
+    at: date,
+    from: User.posts,
+    like_by: []User.liked_post,
+    comments: []Comment.of,
+}
+
+Comment {
+    content: str,
+    at: date,
+    from: User.comments,
+    like_by: User.liked_com,
+    of: Post.comments,
+}
+```
+
+As you can see, link need to be defined in both struct. [] mean an array of value.
+For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post
+just one author. Both linked by the value `posts` and `from`.
+
 # Create a schema

 Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type.
@ -83,6 +132,7 @@ ADD User ( name = 'Adrien', email = 'email', age = 40 }
 - () Are new or updated data (Not already savec)
 - || Are additional options
 - Data are in struct format and can have link
+- By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends)


 # How it's really work
--- a/build.zig
+++ b/build.zig
@ -16,7 +16,7 @@ pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});

    const exe = b.addExecutable(.{
-        .name = "zippon2",
+        .name = "zippon",
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
--- a/src/cliTokenizer.zig
+++ b/src/cliTokenizer.zig
@ -0,0 +1,186 @@
+// From https://github.com/ziglang/zig/blob/master/lib/std/zig/tokenizer.zig
+const std = @import("std");
+
+pub const Token = struct {
+    tag: Tag,
+    loc: Loc,
+
+    pub const Loc = struct {
+        start: usize,
+        end: usize,
+    };
+
+    pub const keywords = std.StaticStringMap(Tag).initComptime(.{
+        .{ "run", .keyword_run },
+        .{ "help", .keyword_help },
+        .{ "describe", .keyword_describe },
+        .{ "quit", .keyword_quit },
+    });
+
+    pub fn isKeyword(self: Token) bool {
+        switch (self.tag) {
+            .keyword_run, .keyword_describe, .keyword_help, .keyword_quit => return true,
+            else => return false,
+        }
+    }
+
+    pub fn getKeyword(bytes: []const u8) ?Tag {
+        return keywords.get(bytes);
+    }
+
+    pub const Tag = enum {
+        eof,
+        invalid,
+
+        keyword_run,
+        keyword_help,
+        keyword_describe,
+        keyword_quit,
+
+        string_literal,
+        identifier,
+    };
+};
+
+pub const Tokenizer = struct {
+    buffer: [:0]const u8,
+    index: usize,
+
+    /// For debugging purposes.
+    pub fn dump(self: *Tokenizer, token: *const Token) void {
+        std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] });
+    }
+
+    pub fn init(buffer: [:0]const u8) Tokenizer {
+        // Skip the UTF-8 BOM if present.
+        return .{
+            .buffer = buffer,
+            .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0,
+        };
+    }
+
+    const State = enum {
+        start,
+        invalid,
+        identifier,
+        string_literal,
+        string_literal_backslash,
+    };
+
+    pub fn next(self: *Tokenizer) Token {
+        var state: State = .start;
+        var result: Token = .{
+            .tag = undefined,
+            .loc = .{
+                .start = self.index,
+                .end = undefined,
+            },
+        };
+        while (true) : (self.index += 1) {
+            const c = self.buffer[self.index];
+            switch (state) {
+                .start => switch (c) {
+                    0 => {
+                        if (self.index == self.buffer.len) return .{
+                            .tag = .eof,
+                            .loc = .{
+                                .start = self.index,
+                                .end = self.index,
+                            },
+                        };
+                        state = .invalid;
+                    },
+                    ' ', '\n', '\t', '\r' => {
+                        result.loc.start = self.index + 1;
+                    },
+                    'a'...'z', 'A'...'Z', '_' => {
+                        state = .identifier;
+                        result.tag = .identifier;
+                    },
+                    '"' => {
+                        state = .string_literal;
+                        result.tag = .string_literal;
+                    },
+                    else => {
+                        state = .invalid;
+                    },
+                },
+
+                .invalid => {
+                    result.tag = .invalid;
+                    break;
+                },
+
+                .identifier => switch (c) {
+                    'a'...'z', 'A'...'Z', '_', '0'...'9' => continue,
+                    else => {
+                        if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| {
+                            result.tag = tag;
+                        }
+                        break;
+                    },
+                },
+
+                .string_literal => switch (c) {
+                    0 => {
+                        if (self.index != self.buffer.len) {
+                            state = .invalid;
+                            continue;
+                        }
+                        result.tag = .invalid;
+                        break;
+                    },
+                    '\n' => {
+                        result.tag = .invalid;
+                        break;
+                    },
+                    '\\' => {
+                        state = .string_literal_backslash;
+                    },
+                    '"' => {
+                        self.index += 1;
+                        break;
+                    },
+                    0x01...0x09, 0x0b...0x1f, 0x7f => {
+                        state = .invalid;
+                    },
+                    else => continue,
+                },
+
+                .string_literal_backslash => switch (c) {
+                    0, '\n' => {
+                        result.tag = .invalid;
+                        break;
+                    },
+                    else => {
+                        state = .string_literal;
+                    },
+                },
+            }
+        }
+
+        result.loc.end = self.index;
+        return result;
+    }
+};
+
+test "Basics" {
+    try testTokenize("help", &.{.keyword_help});
+    try testTokenize("run \"Hello world\"", &.{ .keyword_run, .string_literal });
+    std.debug.print("CLI tokenizer OK\n", .{});
+}
+
+fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
+    var tokenizer = Tokenizer.init(source);
+    for (expected_token_tags) |expected_token_tag| {
+        const token = tokenizer.next();
+        try std.testing.expectEqual(expected_token_tag, token.tag);
+    }
+    // Last token should always be eof, even when the last token was invalid,
+    // in which case the tokenizer is in an invalid state, which can only be
+    // recovered by opinionated means outside the scope of this implementation.
+    const last_token = tokenizer.next();
+    try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
+    try std.testing.expectEqual(source.len, last_token.loc.start);
+    try std.testing.expectEqual(source.len, last_token.loc.end);
+}
--- a/src/main.zig
+++ b/src/main.zig
@ -1,8 +1,10 @@
 const std = @import("std");
 const UUID = @import("uuid.zig").UUID;
 const dtypes = @import("dtypes.zig");
-const Tokenizer = @import("tokenizer.zig").Tokenizer;
-const Token = @import("tokenizer.zig").Token;
+const ziqlTokenizer = @import("ziqlTokenizer.zig").Tokenizer;
+const ziqlToken = @import("ziqlTokenizer.zig").Token;
+const cliTokenizer = @import("cliTokenizer.zig").Tokenizer;
+const cliToken = @import("cliTokenizer.zig").Token;
 const Allocator = std.mem.Allocator;
 const print = std.debug.print;

@ -34,38 +36,42 @@ pub fn main() !void {

    std.debug.print("{s}\n", .{storage.get("User").?.items[0].user.email});

-    // Lets get arguments and what the user want to do
-    var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator);
-    defer argsIterator.deinit();
+    while (true) {
+        std.debug.print("> ", .{});
+        var line_buf: [1024]u8 = undefined;
+        const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(&line_buf, '\n');
+        if (line) |line_str| {
+            const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]);

-    // Skip executable
-    _ = argsIterator.next();
-
-    if (argsIterator.next()) |commandStr| {
-        const command = std.meta.stringToEnum(Commands, commandStr) orelse Commands.unknow;
-        switch (command) {
-            .run => {
-                const query = argsIterator.next();
-                var tokenizer = Tokenizer.init(query.?);
-                var token = tokenizer.next();
-                while (token.tag != Token.Tag.eof) {
-                    std.debug.print("{any}\n", .{token});
-                    token = tokenizer.next();
-                }
-            },
-            .help => {
-                std.debug.print("Welcome to ZipponDB!.", .{});
-            },
-            .describe => {
-                std.debug.print("Here the current schema:\nUser (\n\tname: str,\n\temail:str,\n\tfriend:User\n)\n", .{});
-            },
-            .unknow => {
-                std.debug.print("Unknow command, available are: run, describe, help.\n", .{});
-            },
-            else => {},
+            var cliToker = cliTokenizer.init(null_term_line_str);
+            const commandToken = cliToker.next();
+            switch (commandToken.tag) {
+                .keyword_run => {
+                    const query_token = cliToker.next();
+                    switch (query_token.tag) {
+                        .string_literal => {
+                            std.debug.print("Running query: {s}\n", .{line_str[query_token.loc.start + 1 .. query_token.loc.end - 1]});
+                        },
+                        else => {
+                            std.debug.print("After command run, need a string of a query, eg: \"GRAB User\"\n", .{});
+                            continue;
+                        },
+                    }
+                },
+                .keyword_describe => {
+                    std.debug.print("Current schema: \n\nUser (\n\tid: UUID,\n\tname; str,\n\temail: str,\n\tmessages: []Message\n)\n\nMessage (\n\tid: UUID,\n\tcontent; str,\n\tfrom: User,\n)\n", .{});
+                },
+                .keyword_help => {
+                    std.debug.print("Welcome to ZipponDB.\n\nrun\t\tTo run a query. Args: query: str, the query to execute.\ndescribe\tTo print the current schema.\nkill\t\tTo stop the process without saving\nsave\t\tSave the database to the normal files.\ndump\t\tCreate a new folder with all data as copy. Args: foldername: str, the name of the folder.\nbump\t\tReplace current data with a previous dump; Note: Save the current state with the dump command. Args: foldername: str, the name of the folder to use.\n", .{});
+                },
+                .keyword_quit => {
+                    break;
+                },
+                else => {
+                    std.debug.print("Command need to start with a keyword, including: run, describe, help and quit\n", .{});
+                },
+            }
        }
-    } else {
-        std.debug.print("No args found. Available are: run, help.\n", .{});
    }
 }

@ -78,6 +84,16 @@ fn getById(array: anytype, id: UUID) !*dtypes.User {
    return error.UUIDNotFound;
 }

+fn startsWithDoubleQuote(s: []const u8) bool {
+    if (s.len < 2) return false;
+    return s[0] == '"' and s[s.len - 1] == '"';
+}
+
+fn endsWithDoubleQuote(s: []const u8) bool {
+    if (s.len < 2) return false;
+    return s[s.len - 1] == '"';
+}
+
 test "getById" {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();
--- a/src/ziqlTokenizer.zig
+++ b/src/ziqlTokenizer.zig
@ -54,19 +54,6 @@ pub const Token = struct {
        period,
        bang_equal,
    };
-
-    pub fn lexeme(tag: Tag) ?[]const u8 {
-        return switch (tag) {
-            .invalid,
-            .identifier,
-            .string_literal,
-            .number_literal,
-            => null,
-
-            .bang => "!",
-            .pipe => "|",
-        };
-    }
 };

 pub const Tokenizer = struct {
@ -355,7 +342,7 @@ pub const Tokenizer = struct {

 test "keywords" {
    try testTokenize("GRAB UPDATE ADD DELETE IN", &.{ .keyword_grab, .keyword_update, .keyword_add, .keyword_delete, .keyword_in });
-    std.debug.print("Keywords OK\n", .{});
+    std.debug.print("ZiQL keywords OK\n", .{});
 }

 test "basic query" {
@ -364,7 +351,7 @@ test "basic query" {
    try testTokenize("GRAB User [1; name] {}", &.{ .keyword_grab, .identifier, .l_bracket, .number_literal, .semicolon, .identifier, .r_bracket, .l_brace, .r_brace });
    try testTokenize("GRAB User{}|ASCENDING name|", &.{ .keyword_grab, .identifier, .l_brace, .r_brace, .pipe, .identifier, .identifier, .pipe });
    try testTokenize("DELETE User[1]{name='Adrien'}|ASCENDING name, age|", &.{ .keyword_delete, .identifier, .l_bracket, .number_literal, .r_bracket, .l_brace, .identifier, .equal, .string_literal, .r_brace, .pipe, .identifier, .identifier, .comma, .identifier, .pipe });
-    std.debug.print("Basic query OK\n", .{});
+    std.debug.print("ZiQL query OK\n", .{});
 }

 fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
--- a/zig-out/bin/zippon
+++ b/zig-out/bin/zippon
--- a/zig-out/bin/zippon2
+++ b/zig-out/bin/zippon2