Simple cli and ziql tokenizer

2024-09-02 20:22:46 +02:00 · 2024-09-02 20:22:46 +02:00 · be84a8e481
commit be84a8e481
parent e3f0ff1d84
7 changed files with 292 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -10,19 +10,68 @@ Zig is fast, blablabla
 Meme "That's the neat part..."
-Zippon is a strutural relational potentially in memory written entirely in Zig from stractch.
+Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch.
-You build a binary according to your schema, you can just run it with some arguments and it will create and manage a folder 'zipponDB_DATA'.
+You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'.
 Then you do what you want with it, including:
- Run it with your app as a file and folder
+- Run it with your app as a seperated process and folder
 - Create a Docker and open some port
- Create a Docker with a small API
+- Create a Docker with a small API like flask
 - Other stuffs, Im sure some will find something nice
-Note that you can have multiple binary that run together. Each binary have a unique id that is use to segregate binary inside the folder 'zipponDB_DATA'
+# Integration
 ## Python
 ```python
 import zippondb as zdb
 client = zdb.newClient('path/to/binary')
 print(client.run('describe'))
 users = client.run('GRAB User {}')
 for user in users:
    print(user.name)
 client.run('save')
 ```
 # Benchmark
 I did a database with random data. The schema is like that:
 ```
 User {
    name: str,
    email: str,
    friends: []User.friends,
    posts: []Post.from,
    liked_post: []Post.like_by,
    comments: []Comment.from,
    liked_com: []Comment.like_by,
 }
 Post {
    title: str,
    image: str,
    at: date,
    from: User.posts,
    like_by: []User.liked_post,
    comments: []Comment.of,
 }
 Comment {
    content: str,
    at: date,
    from: User.comments,
    like_by: User.liked_com,
    of: Post.comments,
 }
 ```
 As you can see, link need to be defined in both struct. [] mean an array of value.
 For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post
 just one author. Both linked by the value `posts` and `from`.
 # Create a schema
 Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type.
@ -83,6 +132,7 @@ ADD User ( name = 'Adrien', email = 'email', age = 40 }
 - () Are new or updated data (Not already savec)
 - || Are additional options
 - Data are in struct format and can have link
 - By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends)
 # How it's really work
--- a/build.zig
+++ b/build.zig
@ -16,7 +16,7 @@ pub fn build(b: *std.Build) void {
    const optimize = b.standardOptimizeOption(.{});
    const exe = b.addExecutable(.{
-        .name = "zippon2",
+        .name = "zippon",
        .root_source_file = b.path("src/main.zig"),
        .target = target,
        .optimize = optimize,
--- a/src/cliTokenizer.zig
+++ b/src/cliTokenizer.zig
@ -0,0 +1,186 @@
 // From https://github.com/ziglang/zig/blob/master/lib/std/zig/tokenizer.zig
 const std = @import("std");
 pub const Token = struct {
    tag: Tag,
    loc: Loc,
    pub const Loc = struct {
        start: usize,
        end: usize,
    };
    pub const keywords = std.StaticStringMap(Tag).initComptime(.{
        .{ "run", .keyword_run },
        .{ "help", .keyword_help },
        .{ "describe", .keyword_describe },
        .{ "quit", .keyword_quit },
    });
    pub fn isKeyword(self: Token) bool {
        switch (self.tag) {
            .keyword_run, .keyword_describe, .keyword_help, .keyword_quit => return true,
            else => return false,
        }
    }
    pub fn getKeyword(bytes: []const u8) ?Tag {
        return keywords.get(bytes);
    }
    pub const Tag = enum {
        eof,
        invalid,
        keyword_run,
        keyword_help,
        keyword_describe,
        keyword_quit,
        string_literal,
        identifier,
    };
 };
 pub const Tokenizer = struct {
    buffer: [:0]const u8,
    index: usize,
    /// For debugging purposes.
    pub fn dump(self: *Tokenizer, token: *const Token) void {
        std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] });
    }
    pub fn init(buffer: [:0]const u8) Tokenizer {
        // Skip the UTF-8 BOM if present.
        return .{
            .buffer = buffer,
            .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0,
        };
    }
    const State = enum {
        start,
        invalid,
        identifier,
        string_literal,
        string_literal_backslash,
    };
    pub fn next(self: *Tokenizer) Token {
        var state: State = .start;
        var result: Token = .{
            .tag = undefined,
            .loc = .{
                .start = self.index,
                .end = undefined,
            },
        };
        while (true) : (self.index += 1) {
            const c = self.buffer[self.index];
            switch (state) {
                .start => switch (c) {
                    0 => {
                        if (self.index == self.buffer.len) return .{
                            .tag = .eof,
                            .loc = .{
                                .start = self.index,
                                .end = self.index,
                            },
                        };
                        state = .invalid;
                    },
                    ' ', '\n', '\t', '\r' => {
                        result.loc.start = self.index + 1;
                    },
                    'a'...'z', 'A'...'Z', '_' => {
                        state = .identifier;
                        result.tag = .identifier;
                    },
                    '"' => {
                        state = .string_literal;
                        result.tag = .string_literal;
                    },
                    else => {
                        state = .invalid;
                    },
                },
                .invalid => {
                    result.tag = .invalid;
                    break;
                },
                .identifier => switch (c) {
                    'a'...'z', 'A'...'Z', '_', '0'...'9' => continue,
                    else => {
                        if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| {
                            result.tag = tag;
                        }
                        break;
                    },
                },
                .string_literal => switch (c) {
                    0 => {
                        if (self.index != self.buffer.len) {
                            state = .invalid;
                            continue;
                        }
                        result.tag = .invalid;
                        break;
                    },
                    '\n' => {
                        result.tag = .invalid;
                        break;
                    },
                    '\\' => {
                        state = .string_literal_backslash;
                    },
                    '"' => {
                        self.index += 1;
                        break;
                    },
                    0x01...0x09, 0x0b...0x1f, 0x7f => {
                        state = .invalid;
                    },
                    else => continue,
                },
                .string_literal_backslash => switch (c) {
                    0, '\n' => {
                        result.tag = .invalid;
                        break;
                    },
                    else => {
                        state = .string_literal;
                    },
                },
            }
        }
        result.loc.end = self.index;
        return result;
    }
 };
 test "Basics" {
    try testTokenize("help", &.{.keyword_help});
    try testTokenize("run \"Hello world\"", &.{ .keyword_run, .string_literal });
    std.debug.print("CLI tokenizer OK\n", .{});
 }
 fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
    var tokenizer = Tokenizer.init(source);
    for (expected_token_tags) |expected_token_tag| {
        const token = tokenizer.next();
        try std.testing.expectEqual(expected_token_tag, token.tag);
    }
    // Last token should always be eof, even when the last token was invalid,
    // in which case the tokenizer is in an invalid state, which can only be
    // recovered by opinionated means outside the scope of this implementation.
    const last_token = tokenizer.next();
    try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
    try std.testing.expectEqual(source.len, last_token.loc.start);
    try std.testing.expectEqual(source.len, last_token.loc.end);
 }
--- a/src/main.zig
+++ b/src/main.zig
@ -1,8 +1,10 @@
 const std = @import("std");
 const UUID = @import("uuid.zig").UUID;
 const dtypes = @import("dtypes.zig");
-const Tokenizer = @import("tokenizer.zig").Tokenizer;
+const ziqlTokenizer = @import("ziqlTokenizer.zig").Tokenizer;
-const Token = @import("tokenizer.zig").Token;
+const ziqlToken = @import("ziqlTokenizer.zig").Token;
 const cliTokenizer = @import("cliTokenizer.zig").Tokenizer;
 const cliToken = @import("cliTokenizer.zig").Token;
 const Allocator = std.mem.Allocator;
 const print = std.debug.print;
@ -34,38 +36,42 @@ pub fn main() !void {
    std.debug.print("{s}\n", .{storage.get("User").?.items[0].user.email});
-    // Lets get arguments and what the user want to do
+    while (true) {
-    var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator);
+        std.debug.print("> ", .{});
-    defer argsIterator.deinit();
+        var line_buf: [1024]u8 = undefined;
        const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(&line_buf, '\n');
        if (line) |line_str| {
            const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]);
-    // Skip executable
+            var cliToker = cliTokenizer.init(null_term_line_str);
-    _ = argsIterator.next();
+            const commandToken = cliToker.next();
-
+            switch (commandToken.tag) {
-    if (argsIterator.next()) |commandStr| {
+                .keyword_run => {
-        const command = std.meta.stringToEnum(Commands, commandStr) orelse Commands.unknow;
+                    const query_token = cliToker.next();
-        switch (command) {
+                    switch (query_token.tag) {
-            .run => {
+                        .string_literal => {
-                const query = argsIterator.next();
+                            std.debug.print("Running query: {s}\n", .{line_str[query_token.loc.start + 1 .. query_token.loc.end - 1]});
-                var tokenizer = Tokenizer.init(query.?);
+                        },
-                var token = tokenizer.next();
+                        else => {
-                while (token.tag != Token.Tag.eof) {
+                            std.debug.print("After command run, need a string of a query, eg: \"GRAB User\"\n", .{});
-                    std.debug.print("{any}\n", .{token});
+                            continue;
-                    token = tokenizer.next();
+                        },
-                }
+                    }
-            },
+                },
-            .help => {
+                .keyword_describe => {
-                std.debug.print("Welcome to ZipponDB!.", .{});
+                    std.debug.print("Current schema: \n\nUser (\n\tid: UUID,\n\tname; str,\n\temail: str,\n\tmessages: []Message\n)\n\nMessage (\n\tid: UUID,\n\tcontent; str,\n\tfrom: User,\n)\n", .{});
-            },
+                },
-            .describe => {
+                .keyword_help => {
-                std.debug.print("Here the current schema:\nUser (\n\tname: str,\n\temail:str,\n\tfriend:User\n)\n", .{});
+                    std.debug.print("Welcome to ZipponDB.\n\nrun\t\tTo run a query. Args: query: str, the query to execute.\ndescribe\tTo print the current schema.\nkill\t\tTo stop the process without saving\nsave\t\tSave the database to the normal files.\ndump\t\tCreate a new folder with all data as copy. Args: foldername: str, the name of the folder.\nbump\t\tReplace current data with a previous dump; Note: Save the current state with the dump command. Args: foldername: str, the name of the folder to use.\n", .{});
-            },
+                },
-            .unknow => {
+                .keyword_quit => {
-                std.debug.print("Unknow command, available are: run, describe, help.\n", .{});
+                    break;
-            },
+                },
-            else => {},
+                else => {
                    std.debug.print("Command need to start with a keyword, including: run, describe, help and quit\n", .{});
                },
            }
        }
    } else {
        std.debug.print("No args found. Available are: run, help.\n", .{});
    }
 }
@ -78,6 +84,16 @@ fn getById(array: anytype, id: UUID) !*dtypes.User {
    return error.UUIDNotFound;
 }
 fn startsWithDoubleQuote(s: []const u8) bool {
    if (s.len < 2) return false;
    return s[0] == '"' and s[s.len - 1] == '"';
 }
 fn endsWithDoubleQuote(s: []const u8) bool {
    if (s.len < 2) return false;
    return s[s.len - 1] == '"';
 }
 test "getById" {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();
--- a/src/ziqlTokenizer.zig
+++ b/src/ziqlTokenizer.zig
@ -54,19 +54,6 @@ pub const Token = struct {
        period,
        bang_equal,
    };
    pub fn lexeme(tag: Tag) ?[]const u8 {
        return switch (tag) {
            .invalid,
            .identifier,
            .string_literal,
            .number_literal,
            => null,
            .bang => "!",
            .pipe => "|",
        };
    }
 };
 pub const Tokenizer = struct {
@ -355,7 +342,7 @@ pub const Tokenizer = struct {
 test "keywords" {
    try testTokenize("GRAB UPDATE ADD DELETE IN", &.{ .keyword_grab, .keyword_update, .keyword_add, .keyword_delete, .keyword_in });
-    std.debug.print("Keywords OK\n", .{});
+    std.debug.print("ZiQL keywords OK\n", .{});
 }
 test "basic query" {
@ -364,7 +351,7 @@ test "basic query" {
    try testTokenize("GRAB User [1; name] {}", &.{ .keyword_grab, .identifier, .l_bracket, .number_literal, .semicolon, .identifier, .r_bracket, .l_brace, .r_brace });
    try testTokenize("GRAB User{}|ASCENDING name|", &.{ .keyword_grab, .identifier, .l_brace, .r_brace, .pipe, .identifier, .identifier, .pipe });
    try testTokenize("DELETE User[1]{name='Adrien'}|ASCENDING name, age|", &.{ .keyword_delete, .identifier, .l_bracket, .number_literal, .r_bracket, .l_brace, .identifier, .equal, .string_literal, .r_brace, .pipe, .identifier, .identifier, .comma, .identifier, .pipe });
-    std.debug.print("Basic query OK\n", .{});
+    std.debug.print("ZiQL query OK\n", .{});
 }
 fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
--- a/zig-out/bin/zippon
+++ b/zig-out/bin/zippon
--- a/zig-out/bin/zippon2
+++ b/zig-out/bin/zippon2