Simple cli and ziql tokenizer

This commit is contained in:
Adrien Bouvais 2024-09-02 20:22:46 +02:00
parent e3f0ff1d84
commit be84a8e481
7 changed files with 292 additions and 53 deletions

View File

@ -10,19 +10,68 @@ Zig is fast, blablabla
Meme "That's the neat part..."
Zippon is a strutural relational potentially in memory written entirely in Zig from stractch.
Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch.
You build a binary according to your schema, you can just run it with some arguments and it will create and manage a folder 'zipponDB_DATA'.
You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'.
Then you do what you want with it, including:
- Run it with your app as a file and folder
- Run it with your app as a seperated process and folder
- Create a Docker and open some port
- Create a Docker with a small API
- Create a Docker with a small API like flask
- Other stuffs, Im sure some will find something nice
Note that you can have multiple binary that run together. Each binary have a unique id that is use to segregate binary inside the folder 'zipponDB_DATA'
# Integration
## Python
```python
import zippondb as zdb
client = zdb.newClient('path/to/binary')
print(client.run('describe'))
users = client.run('GRAB User {}')
for user in users:
print(user.name)
client.run('save')
```
# Benchmark
I did a database with random data. The schema is like that:
```
User {
name: str,
email: str,
friends: []User.friends,
posts: []Post.from,
liked_post: []Post.like_by,
comments: []Comment.from,
liked_com: []Comment.like_by,
}
Post {
title: str,
image: str,
at: date,
from: User.posts,
like_by: []User.liked_post,
comments: []Comment.of,
}
Comment {
content: str,
at: date,
from: User.comments,
like_by: User.liked_com,
of: Post.comments,
}
```
As you can see, link need to be defined in both struct. [] mean an array of value.
For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post
just one author. Both linked by the value `posts` and `from`.
# Create a schema
Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type.
@ -83,6 +132,7 @@ ADD User ( name = 'Adrien', email = 'email', age = 40 }
- () Are new or updated data (Not already savec)
- || Are additional options
- Data are in struct format and can have link
- By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends)
# How it's really work

View File

@ -16,7 +16,7 @@ pub fn build(b: *std.Build) void {
const optimize = b.standardOptimizeOption(.{});
const exe = b.addExecutable(.{
.name = "zippon2",
.name = "zippon",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,

186
src/cliTokenizer.zig Normal file
View File

@ -0,0 +1,186 @@
// From https://github.com/ziglang/zig/blob/master/lib/std/zig/tokenizer.zig
const std = @import("std");
pub const Token = struct {
tag: Tag,
loc: Loc,
pub const Loc = struct {
start: usize,
end: usize,
};
pub const keywords = std.StaticStringMap(Tag).initComptime(.{
.{ "run", .keyword_run },
.{ "help", .keyword_help },
.{ "describe", .keyword_describe },
.{ "quit", .keyword_quit },
});
pub fn isKeyword(self: Token) bool {
switch (self.tag) {
.keyword_run, .keyword_describe, .keyword_help, .keyword_quit => return true,
else => return false,
}
}
pub fn getKeyword(bytes: []const u8) ?Tag {
return keywords.get(bytes);
}
pub const Tag = enum {
eof,
invalid,
keyword_run,
keyword_help,
keyword_describe,
keyword_quit,
string_literal,
identifier,
};
};
pub const Tokenizer = struct {
buffer: [:0]const u8,
index: usize,
/// For debugging purposes.
pub fn dump(self: *Tokenizer, token: *const Token) void {
std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] });
}
pub fn init(buffer: [:0]const u8) Tokenizer {
// Skip the UTF-8 BOM if present.
return .{
.buffer = buffer,
.index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0,
};
}
const State = enum {
start,
invalid,
identifier,
string_literal,
string_literal_backslash,
};
pub fn next(self: *Tokenizer) Token {
var state: State = .start;
var result: Token = .{
.tag = undefined,
.loc = .{
.start = self.index,
.end = undefined,
},
};
while (true) : (self.index += 1) {
const c = self.buffer[self.index];
switch (state) {
.start => switch (c) {
0 => {
if (self.index == self.buffer.len) return .{
.tag = .eof,
.loc = .{
.start = self.index,
.end = self.index,
},
};
state = .invalid;
},
' ', '\n', '\t', '\r' => {
result.loc.start = self.index + 1;
},
'a'...'z', 'A'...'Z', '_' => {
state = .identifier;
result.tag = .identifier;
},
'"' => {
state = .string_literal;
result.tag = .string_literal;
},
else => {
state = .invalid;
},
},
.invalid => {
result.tag = .invalid;
break;
},
.identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9' => continue,
else => {
if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| {
result.tag = tag;
}
break;
},
},
.string_literal => switch (c) {
0 => {
if (self.index != self.buffer.len) {
state = .invalid;
continue;
}
result.tag = .invalid;
break;
},
'\n' => {
result.tag = .invalid;
break;
},
'\\' => {
state = .string_literal_backslash;
},
'"' => {
self.index += 1;
break;
},
0x01...0x09, 0x0b...0x1f, 0x7f => {
state = .invalid;
},
else => continue,
},
.string_literal_backslash => switch (c) {
0, '\n' => {
result.tag = .invalid;
break;
},
else => {
state = .string_literal;
},
},
}
}
result.loc.end = self.index;
return result;
}
};
test "Basics" {
try testTokenize("help", &.{.keyword_help});
try testTokenize("run \"Hello world\"", &.{ .keyword_run, .string_literal });
std.debug.print("CLI tokenizer OK\n", .{});
}
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
var tokenizer = Tokenizer.init(source);
for (expected_token_tags) |expected_token_tag| {
const token = tokenizer.next();
try std.testing.expectEqual(expected_token_tag, token.tag);
}
// Last token should always be eof, even when the last token was invalid,
// in which case the tokenizer is in an invalid state, which can only be
// recovered by opinionated means outside the scope of this implementation.
const last_token = tokenizer.next();
try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
try std.testing.expectEqual(source.len, last_token.loc.start);
try std.testing.expectEqual(source.len, last_token.loc.end);
}

View File

@ -1,8 +1,10 @@
const std = @import("std");
const UUID = @import("uuid.zig").UUID;
const dtypes = @import("dtypes.zig");
const Tokenizer = @import("tokenizer.zig").Tokenizer;
const Token = @import("tokenizer.zig").Token;
const ziqlTokenizer = @import("ziqlTokenizer.zig").Tokenizer;
const ziqlToken = @import("ziqlTokenizer.zig").Token;
const cliTokenizer = @import("cliTokenizer.zig").Tokenizer;
const cliToken = @import("cliTokenizer.zig").Token;
const Allocator = std.mem.Allocator;
const print = std.debug.print;
@ -34,38 +36,42 @@ pub fn main() !void {
std.debug.print("{s}\n", .{storage.get("User").?.items[0].user.email});
// Lets get arguments and what the user want to do
var argsIterator = try std.process.ArgIterator.initWithAllocator(allocator);
defer argsIterator.deinit();
while (true) {
std.debug.print("> ", .{});
var line_buf: [1024]u8 = undefined;
const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(&line_buf, '\n');
if (line) |line_str| {
const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]);
// Skip executable
_ = argsIterator.next();
if (argsIterator.next()) |commandStr| {
const command = std.meta.stringToEnum(Commands, commandStr) orelse Commands.unknow;
switch (command) {
.run => {
const query = argsIterator.next();
var tokenizer = Tokenizer.init(query.?);
var token = tokenizer.next();
while (token.tag != Token.Tag.eof) {
std.debug.print("{any}\n", .{token});
token = tokenizer.next();
}
},
.help => {
std.debug.print("Welcome to ZipponDB!.", .{});
},
.describe => {
std.debug.print("Here the current schema:\nUser (\n\tname: str,\n\temail:str,\n\tfriend:User\n)\n", .{});
},
.unknow => {
std.debug.print("Unknow command, available are: run, describe, help.\n", .{});
},
else => {},
var cliToker = cliTokenizer.init(null_term_line_str);
const commandToken = cliToker.next();
switch (commandToken.tag) {
.keyword_run => {
const query_token = cliToker.next();
switch (query_token.tag) {
.string_literal => {
std.debug.print("Running query: {s}\n", .{line_str[query_token.loc.start + 1 .. query_token.loc.end - 1]});
},
else => {
std.debug.print("After command run, need a string of a query, eg: \"GRAB User\"\n", .{});
continue;
},
}
},
.keyword_describe => {
std.debug.print("Current schema: \n\nUser (\n\tid: UUID,\n\tname; str,\n\temail: str,\n\tmessages: []Message\n)\n\nMessage (\n\tid: UUID,\n\tcontent; str,\n\tfrom: User,\n)\n", .{});
},
.keyword_help => {
std.debug.print("Welcome to ZipponDB.\n\nrun\t\tTo run a query. Args: query: str, the query to execute.\ndescribe\tTo print the current schema.\nkill\t\tTo stop the process without saving\nsave\t\tSave the database to the normal files.\ndump\t\tCreate a new folder with all data as copy. Args: foldername: str, the name of the folder.\nbump\t\tReplace current data with a previous dump; Note: Save the current state with the dump command. Args: foldername: str, the name of the folder to use.\n", .{});
},
.keyword_quit => {
break;
},
else => {
std.debug.print("Command need to start with a keyword, including: run, describe, help and quit\n", .{});
},
}
}
} else {
std.debug.print("No args found. Available are: run, help.\n", .{});
}
}
@ -78,6 +84,16 @@ fn getById(array: anytype, id: UUID) !*dtypes.User {
return error.UUIDNotFound;
}
fn startsWithDoubleQuote(s: []const u8) bool {
if (s.len < 2) return false;
return s[0] == '"' and s[s.len - 1] == '"';
}
fn endsWithDoubleQuote(s: []const u8) bool {
if (s.len < 2) return false;
return s[s.len - 1] == '"';
}
test "getById" {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();

View File

@ -54,19 +54,6 @@ pub const Token = struct {
period,
bang_equal,
};
pub fn lexeme(tag: Tag) ?[]const u8 {
return switch (tag) {
.invalid,
.identifier,
.string_literal,
.number_literal,
=> null,
.bang => "!",
.pipe => "|",
};
}
};
pub const Tokenizer = struct {
@ -355,7 +342,7 @@ pub const Tokenizer = struct {
test "keywords" {
try testTokenize("GRAB UPDATE ADD DELETE IN", &.{ .keyword_grab, .keyword_update, .keyword_add, .keyword_delete, .keyword_in });
std.debug.print("Keywords OK\n", .{});
std.debug.print("ZiQL keywords OK\n", .{});
}
test "basic query" {
@ -364,7 +351,7 @@ test "basic query" {
try testTokenize("GRAB User [1; name] {}", &.{ .keyword_grab, .identifier, .l_bracket, .number_literal, .semicolon, .identifier, .r_bracket, .l_brace, .r_brace });
try testTokenize("GRAB User{}|ASCENDING name|", &.{ .keyword_grab, .identifier, .l_brace, .r_brace, .pipe, .identifier, .identifier, .pipe });
try testTokenize("DELETE User[1]{name='Adrien'}|ASCENDING name, age|", &.{ .keyword_delete, .identifier, .l_bracket, .number_literal, .r_bracket, .l_brace, .identifier, .equal, .string_literal, .r_brace, .pipe, .identifier, .identifier, .comma, .identifier, .pipe });
std.debug.print("Basic query OK\n", .{});
std.debug.print("ZiQL query OK\n", .{});
}
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {

BIN
zig-out/bin/zippon Executable file

Binary file not shown.

Binary file not shown.