diff --git a/README.md b/README.md index 3de059d..6e2454b 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,68 @@ Zig is fast, blablabla Meme "That's the neat part..." -Zippon is a strutural relational potentially in memory written entirely in Zig from stractch. +Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch. -You build a binary according to your schema, you can just run it with some arguments and it will create and manage a folder 'zipponDB_DATA'. +You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'. Then you do what you want with it, including: -- Run it with your app as a file and folder +- Run it with your app as a seperated process and folder - Create a Docker and open some port -- Create a Docker with a small API +- Create a Docker with a small API like flask - Other stuffs, Im sure some will find something nice -Note that you can have multiple binary that run together. Each binary have a unique id that is use to segregate binary inside the folder 'zipponDB_DATA' +# Integration + +## Python + +```python +import zippondb as zdb + +client = zdb.newClient('path/to/binary') +print(client.run('describe')) + +users = client.run('GRAB User {}') +for user in users: + print(user.name) + +client.run('save') +``` # Benchmark +I did a database with random data. The schema is like that: +``` +User { + name: str, + email: str, + friends: []User.friends, + posts: []Post.from, + liked_post: []Post.like_by, + comments: []Comment.from, + liked_com: []Comment.like_by, +} + +Post { + title: str, + image: str, + at: date, + from: User.posts, + like_by: []User.liked_post, + comments: []Comment.of, +} + +Comment { + content: str, + at: date, + from: User.comments, + like_by: User.liked_com, + of: Post.comments, +} +``` + +As you can see, link need to be defined in both struct. [] mean an array of value. +For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post +just one author. Both linked by the value `posts` and `from`. + # Create a schema Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type. @@ -83,6 +132,7 @@ ADD User ( name = 'Adrien', email = 'email', age = 40 } - () Are new or updated data (Not already savec) - || Are additional options - Data are in struct format and can have link +- By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends) # How it's really work diff --git a/build.zig b/build.zig index 516ce9a..8f17023 100644 --- a/build.zig +++ b/build.zig @@ -16,7 +16,7 @@ pub fn build(b: *std.Build) void { const optimize = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ - .name = "zippon2", + .name = "zippon", .root_source_file = b.path("src/main.zig"), .target = target, .optimize = optimize, diff --git a/src/cliTokenizer.zig b/src/cliTokenizer.zig new file mode 100644 index 0000000..991d366 --- /dev/null +++ b/src/cliTokenizer.zig @@ -0,0 +1,186 @@ +// From https://github.com/ziglang/zig/blob/master/lib/std/zig/tokenizer.zig +const std = @import("std"); + +pub const Token = struct { + tag: Tag, + loc: Loc, + + pub const Loc = struct { + start: usize, + end: usize, + }; + + pub const keywords = std.StaticStringMap(Tag).initComptime(.{ + .{ "run", .keyword_run }, + .{ "help", .keyword_help }, + .{ "describe", .keyword_describe }, + .{ "quit", .keyword_quit }, + }); + + pub fn isKeyword(self: Token) bool { + switch (self.tag) { + .keyword_run, .keyword_describe, .keyword_help, .keyword_quit => return true, + else => return false, + } + } + + pub fn getKeyword(bytes: []const u8) ?Tag { + return keywords.get(bytes); + } + + pub const Tag = enum { + eof, + invalid, + + keyword_run, + keyword_help, + keyword_describe, + keyword_quit, + + string_literal, + identifier, + }; +}; + +pub const Tokenizer = struct { + buffer: [:0]const u8, + index: usize, + + /// For debugging purposes. + pub fn dump(self: *Tokenizer, token: *const Token) void { + std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] }); + } + + pub fn init(buffer: [:0]const u8) Tokenizer { + // Skip the UTF-8 BOM if present. + return .{ + .buffer = buffer, + .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, + }; + } + + const State = enum { + start, + invalid, + identifier, + string_literal, + string_literal_backslash, + }; + + pub fn next(self: *Tokenizer) Token { + var state: State = .start; + var result: Token = .{ + .tag = undefined, + .loc = .{ + .start = self.index, + .end = undefined, + }, + }; + while (true) : (self.index += 1) { + const c = self.buffer[self.index]; + switch (state) { + .start => switch (c) { + 0 => { + if (self.index == self.buffer.len) return .{ + .tag = .eof, + .loc = .{ + .start = self.index, + .end = self.index, + }, + }; + state = .invalid; + }, + ' ', '\n', '\t', '\r' => { + result.loc.start = self.index + 1; + }, + 'a'...'z', 'A'...'Z', '_' => { + state = .identifier; + result.tag = .identifier; + }, + '"' => { + state = .string_literal; + result.tag = .string_literal; + }, + else => { + state = .invalid; + }, + }, + + .invalid => { + result.tag = .invalid; + break; + }, + + .identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => continue, + else => { + if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| { + result.tag = tag; + } + break; + }, + }, + + .string_literal => switch (c) { + 0 => { + if (self.index != self.buffer.len) { + state = .invalid; + continue; + } + result.tag = .invalid; + break; + }, + '\n' => { + result.tag = .invalid; + break; + }, + '\\' => { + state = .string_literal_backslash; + }, + '"' => { + self.index += 1; + break; + }, + 0x01...0x09, 0x0b...0x1f, 0x7f => { + state = .invalid; + }, + else => continue, + }, + + .string_literal_backslash => switch (c) { + 0, '\n' => { + result.tag = .invalid; + break; + }, + else => { + state = .string_literal; + }, + }, + } + } + + result.loc.end = self.index; + return result; + } +}; + +test "Basics" { + try testTokenize("help", &.{.keyword_help}); + try testTokenize("run \"Hello world\"", &.{ .keyword_run, .string_literal }); + std.debug.print("CLI tokenizer OK\n", .{}); +} + +fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { + var tokenizer = Tokenizer.init(source); + for (expected_token_tags) |expected_token_tag| { + const token = tokenizer.next(); + try std.testing.expectEqual(expected_token_tag, token.tag); + } + // Last token should always be eof, even when the last token was invalid, + // in which case the tokenizer is in an invalid state, which can only be + // recovered by opinionated means outside the scope of this implementation. + const last_token = tokenizer.next(); + try std.testing.expectEqual(Token.Tag.eof, last_token.tag); + try std.testing.expectEqual(source.len, last_token.loc.start); + try std.testing.expectEqual(source.len, last_token.loc.end); +} diff --git a/src/main.zig b/src/main.zig index 633e30c..80682c3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,8 +1,10 @@ const std = @import("std"); const UUID = @import("uuid.zig").UUID; const dtypes = @import("dtypes.zig"); -const Tokenizer = @import("tokenizer.zig").Tokenizer; -const Token = @import("tokenizer.zig").Token; +const ziqlTokenizer = @import("ziqlTokenizer.zig").Tokenizer; +const ziqlToken = @import("ziqlTokenizer.zig").Token; +const cliTokenizer = @import("cliTokenizer.zig").Tokenizer; +const cliToken = @import("cliTokenizer.zig").Token; const Allocator = std.mem.Allocator; const print = std.debug.print; @@ -34,38 +36,42 @@ pub fn main() !void { std.debug.print("{s}\n", .{storage.get("User").?.items[0].user.email}); - // Lets get arguments and what the user want to do - var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator); - defer argsIterator.deinit(); + while (true) { + std.debug.print("> ", .{}); + var line_buf: [1024]u8 = undefined; + const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(&line_buf, '\n'); + if (line) |line_str| { + const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]); - // Skip executable - _ = argsIterator.next(); - - if (argsIterator.next()) |commandStr| { - const command = std.meta.stringToEnum(Commands, commandStr) orelse Commands.unknow; - switch (command) { - .run => { - const query = argsIterator.next(); - var tokenizer = Tokenizer.init(query.?); - var token = tokenizer.next(); - while (token.tag != Token.Tag.eof) { - std.debug.print("{any}\n", .{token}); - token = tokenizer.next(); - } - }, - .help => { - std.debug.print("Welcome to ZipponDB!.", .{}); - }, - .describe => { - std.debug.print("Here the current schema:\nUser (\n\tname: str,\n\temail:str,\n\tfriend:User\n)\n", .{}); - }, - .unknow => { - std.debug.print("Unknow command, available are: run, describe, help.\n", .{}); - }, - else => {}, + var cliToker = cliTokenizer.init(null_term_line_str); + const commandToken = cliToker.next(); + switch (commandToken.tag) { + .keyword_run => { + const query_token = cliToker.next(); + switch (query_token.tag) { + .string_literal => { + std.debug.print("Running query: {s}\n", .{line_str[query_token.loc.start + 1 .. query_token.loc.end - 1]}); + }, + else => { + std.debug.print("After command run, need a string of a query, eg: \"GRAB User\"\n", .{}); + continue; + }, + } + }, + .keyword_describe => { + std.debug.print("Current schema: \n\nUser (\n\tid: UUID,\n\tname; str,\n\temail: str,\n\tmessages: []Message\n)\n\nMessage (\n\tid: UUID,\n\tcontent; str,\n\tfrom: User,\n)\n", .{}); + }, + .keyword_help => { + std.debug.print("Welcome to ZipponDB.\n\nrun\t\tTo run a query. Args: query: str, the query to execute.\ndescribe\tTo print the current schema.\nkill\t\tTo stop the process without saving\nsave\t\tSave the database to the normal files.\ndump\t\tCreate a new folder with all data as copy. Args: foldername: str, the name of the folder.\nbump\t\tReplace current data with a previous dump; Note: Save the current state with the dump command. Args: foldername: str, the name of the folder to use.\n", .{}); + }, + .keyword_quit => { + break; + }, + else => { + std.debug.print("Command need to start with a keyword, including: run, describe, help and quit\n", .{}); + }, + } } - } else { - std.debug.print("No args found. Available are: run, help.\n", .{}); } } @@ -78,6 +84,16 @@ fn getById(array: anytype, id: UUID) !*dtypes.User { return error.UUIDNotFound; } +fn startsWithDoubleQuote(s: []const u8) bool { + if (s.len < 2) return false; + return s[0] == '"' and s[s.len - 1] == '"'; +} + +fn endsWithDoubleQuote(s: []const u8) bool { + if (s.len < 2) return false; + return s[s.len - 1] == '"'; +} + test "getById" { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); diff --git a/src/tokenizer.zig b/src/ziqlTokenizer.zig similarity index 97% rename from src/tokenizer.zig rename to src/ziqlTokenizer.zig index b03beba..0dc1953 100644 --- a/src/tokenizer.zig +++ b/src/ziqlTokenizer.zig @@ -54,19 +54,6 @@ pub const Token = struct { period, bang_equal, }; - - pub fn lexeme(tag: Tag) ?[]const u8 { - return switch (tag) { - .invalid, - .identifier, - .string_literal, - .number_literal, - => null, - - .bang => "!", - .pipe => "|", - }; - } }; pub const Tokenizer = struct { @@ -355,7 +342,7 @@ pub const Tokenizer = struct { test "keywords" { try testTokenize("GRAB UPDATE ADD DELETE IN", &.{ .keyword_grab, .keyword_update, .keyword_add, .keyword_delete, .keyword_in }); - std.debug.print("Keywords OK\n", .{}); + std.debug.print("ZiQL keywords OK\n", .{}); } test "basic query" { @@ -364,7 +351,7 @@ test "basic query" { try testTokenize("GRAB User [1; name] {}", &.{ .keyword_grab, .identifier, .l_bracket, .number_literal, .semicolon, .identifier, .r_bracket, .l_brace, .r_brace }); try testTokenize("GRAB User{}|ASCENDING name|", &.{ .keyword_grab, .identifier, .l_brace, .r_brace, .pipe, .identifier, .identifier, .pipe }); try testTokenize("DELETE User[1]{name='Adrien'}|ASCENDING name, age|", &.{ .keyword_delete, .identifier, .l_bracket, .number_literal, .r_bracket, .l_brace, .identifier, .equal, .string_literal, .r_brace, .pipe, .identifier, .identifier, .comma, .identifier, .pipe }); - std.debug.print("Basic query OK\n", .{}); + std.debug.print("ZiQL query OK\n", .{}); } fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { diff --git a/zig-out/bin/zippon b/zig-out/bin/zippon new file mode 100755 index 0000000..9c3ab19 Binary files /dev/null and b/zig-out/bin/zippon differ diff --git a/zig-out/bin/zippon2 b/zig-out/bin/zippon2 index 5ad8178..20286b6 100755 Binary files a/zig-out/bin/zippon2 and b/zig-out/bin/zippon2 differ