Basic ADD working, started the GRAB parser

This commit is contained in:
Adrien Bouvais 2024-09-12 20:13:44 +02:00
parent dacc9fea67
commit 87e11188d2
30 changed files with 841 additions and 220 deletions

261
README.md
View File

@ -1,146 +1,163 @@
# ZipponDB
Open-source database written 100% in zig.
![alt text](https://github.com/MrBounty/ZipponDB/blob/main/logo.jpeg)
# Written in Zig
# Introduction
Zig is fast, blablabla
ZipponDB is a relational database written entirely in Zig from stractch.
It use a custom query language named ZipponQL or ZiQL for short.
# How it's work
The first time you run ZipponDB, it will create a new ZipponDB directory and start the Zippon CLI.
From here, you can create a new engine by running `schema build`. It will get the file `schema.zipponschema` and build a custom binary
using zig that the CLI will then use to manipulate data. You then interact with the engine by using `run "My query go here"` or
by directly using the engine binary.
Meme "That's the neat part..."
## Why Zippon ?
Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch.
I first started ZipponDB to learn, but I think that in order to learn, you need to build something real, so I chose to do a database
as I try to become an expert in it.
Now for Zippon advantages:
- Open-source and 100% in Zig with 0 dependencies
- Relational database
- Small, fast and implementable everywhere
# Declare a schema
ZipponDB need a schema to work. A schema is a way to define how your data will be store. Compared to SQL, you can see it as a
file where you declare all table name, columns name, data type and relationship. But here you declare struct. A struct have a name and
members. A member is one data or link and have a type associated. Here a simple example for a user:
```
User (
name: str,
email: str,
best_friend: User,
friends: []User,
)
```
In this example each user have a name and email as a string. But also one best friend as a link. [] mean that there is a
list of this value. Note that all value can be null exept list, they can be empty.
Here a more advance example with multiple struct:
```
User {
name: str,
email: str,
friends: []User,
posts: []Post,
liked_post: []Post,
comments: []Comment,
liked_com: []Comment,
}
Post {
title: str,
image: str,
at: date,
from: User,
like_by: []User,
comments: []Comment,
}
Comment {
content: str,
at: date,
from: User,
like_by: []User,
of: Post,
}
```
Note: data not yet implemented.
# ZipponQL
Zippon have it's own query language. Why ? Idk, I wanted to do it.
The language itself is fairly easy in my opinion. Here the basic:
- {} Are filters
- [] Are how much; what data
- () Are new or updated data (Not already in file)
- || Are additional options
- Link need to be specify between [] to be return, other are returned automatically
- Data are in struct format and can have link
### Some examples
`GRAB User`
Get all users
`GRAB User { name = 'Adrien' }`
Get all user named Adrien
`GRAB User [1; email]`
Get one user email
`GRAB User | ASCENDING name |`
Get all users ordered by name
`GRAB User [name] { age > 10 AND name != 'Adrien' } | DECENDING age |`
Get just the name of all users that are more than 10 years old and not named Adrien
`GRAB User [1] { bestfriend = { name = 'Adrien' } }`
Get one user that have a best friend named Adrien
`GRAB User [10; friends [1]] { age > 10 } | ASC name |`
Get one friend of the 10 first user above 10 years old in ascending name.
### Not yet implemented
`GRAB Message [100; comments [ date ] ] { .writter = { name = 'Adrien' }.bestfriend }`
Get the date of 100 comments written by the best friend of a user named Adrien
`GRAB User { IN Message { date > '12-01-2014' }.writter }`
Get all users that sended a message after the 12 january 2014
`GRAB User { !IN Comment { }.writter }`
Get all user that didn't wrote a comment
`GRAB User { IN User { name = 'Adrien' }.friends }`
Get all user that are friends with an Adrien
`UPDATE User [1] { name = 'Adrien' } => ( email = 'new@email.com' )`
`REMOVE User { id = '000-000' }`
`ADD User ( name = 'Adrien', email = 'email', age = 40 )`
You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'.
Then you do what you want with it, including:
- Run it with your app as a seperated process and folder
- Create a Docker and open some port
- Create a Docker with a small API like flask
- Other stuffs, Im sure some will find something nice
# Integration
For now there is only a python intregration, but because it is just 2-3 command, it is easy to implement with other language.
## Python
```python
import zippondb as zdb
client = zdb.newClient('path/to/binary')
print(client.exe('describe'))
client.exe('schema build')
print(client.exe('schema describe'))
# Return named tuple
users = client.exe('GRAB User {}')
# Return named tuple of all users
users = client.run('GRAB User {}')
for user in users:
print(user.name)
client.exe('save')
```
# Benchmark
# Roadmap
I did a database with random data. The schema is like that:
```
User {
name: str,
email: str,
friends: []User.friends,
posts: []Post.from,
liked_post: []Post.like_by,
comments: []Comment.from,
liked_com: []Comment.like_by,
}
Post {
title: str,
image: str,
at: date,
from: User.posts,
like_by: []User.liked_post,
comments: []Comment.of,
}
Comment {
content: str,
at: date,
from: User.comments,
like_by: User.liked_com,
of: Post.comments,
}
```
As you can see, link need to be defined in both struct. [] mean an array of value.
For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post
just one author. Both linked by the value `posts` and `from`.
# Create a schema
Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type.
Very similar to a row in a table, columns being datatype and a row a single struct.
The schema is directly INSIDE the binary, so each binary are per schema ! This is for effenciency, idk to be honest, I guess ? lol
# Migration
For now you can't migrate the data of one binary to another, so you will need to different binary.
# Zippon language
Ok so I went crazy on that, on have it how language. It is stupide and I love it. I wanted to do like EdgeDB but no, too simple.
Anyway, I tried to do something different, to do something different, idk, you're the jduge of it.
```
GRAB User { name = 'Adrien' }
Get all user named Adrien
GRAB User [1; email] { }
Get one email
GRAB User {} | ASCENDING name |
Get all users ordered by name
GRAB User [name] { age > 10 AND name != 'Adrien' } | DECENDING age |
Get just the name of all users that are 10 years old or more and not named Adrien ordered by age
GRAB User { bestfriend = { name = 'Adrien' } }
GRAB User { bestfriend = User{ name = 'Adrien' } } // Same
Get all user that have a best friend named Adrien
GRAB User [10] { IN User [1] { age > 10 } | ASC name |.friends }
Get 10 users that are friend with the first user older than 10 years old in ascending name order
GRAB Message [100; comments [ date ] ] { .writter = { name = 'Adrien' }.bestfriend }
Get the date of 100 comments from the best friend of the writter named Adrien
GRAB User { IN Message { date > '12-01-2014' }.writter }
Get all users that sended a message after the 12 january 2014
GRAB User { !IN Comment { }.writter }
Get all user that didn't wrote a comment
GRAB User { IN User { name = 'Adrien' }.friends }
Get all user that are friends with an Adrien
UPDATE User [1] { name = 'Adrien' } => ( email = 'new@email.com' )
REMOVE User { id = '000-000' }
ADD User ( name = 'Adrien', email = 'email', age = 40 }
```
- {} Are filters
- [] Are how much; what data
- () Are new or updated data (Not already savec)
- || Are additional options
- Data are in struct format and can have link
- By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends)
# How it's really work
NOTE: Do this in a separe file
## Tokenizer
The tokenizer of the language is
# ZipponDB
[ ] Beta without link
[ ] Relationships/links
[ ] Multi threading
[ ] Transaction
[ ] Docker image
[ ] Migration of schema
[ ] Dump/Bump data
[ ] In memory option
[ ] Archives

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000 26

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000 'adrien@gmail.com'

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000 'Adrien'

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000 null

View File

@ -0,0 +1 @@
2a7bf36b-20c8-4a1d-8f86-398311220000

View File

@ -19,21 +19,50 @@ pub fn build(b: *std.Build) void {
run_step.dependOn(&run_cmd.step);
// Test step
const cliTokenizer_tests = b.addTest(.{
const tests1 = b.addTest(.{
.root_source_file = b.path("src/parsers/data-parsing.zig"),
.target = target,
.optimize = optimize,
.name = "Data parsing",
});
const run_tests1 = b.addRunArtifact(tests1);
const tests2 = b.addTest(.{
.root_source_file = b.path("src/tokenizers/cliTokenizer.zig"),
.target = target,
.optimize = optimize,
.name = "CLID Tokenizer test",
.name = "CLI tokenizer",
});
const ziqlTokenizer_tests = b.addTest(.{
const run_tests2 = b.addRunArtifact(tests2);
const tests3 = b.addTest(.{
.root_source_file = b.path("src/tokenizers/ziqlTokenizer.zig"),
.target = target,
.optimize = optimize,
.name = "ZiQL Tokenizer test",
.name = "ZiQL tokenizer",
});
const run_cliTokenizer_tests = b.addRunArtifact(cliTokenizer_tests);
const run_ziqlTokenizer_tests = b.addRunArtifact(ziqlTokenizer_tests);
const run_tests3 = b.addRunArtifact(tests3);
const tests4 = b.addTest(.{
.root_source_file = b.path("src/tokenizers/schemaTokenizer.zig"),
.target = target,
.optimize = optimize,
.name = "Schema tokenizer",
});
const run_tests4 = b.addRunArtifact(tests4);
const tests5 = b.addTest(.{
.root_source_file = b.path("src/test.zig"),
.target = target,
.optimize = optimize,
.name = "ADD functions",
});
const run_tests5 = b.addRunArtifact(tests5);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_cliTokenizer_tests.step);
test_step.dependOn(&run_ziqlTokenizer_tests.step);
test_step.dependOn(&run_tests1.step);
test_step.dependOn(&run_tests2.step);
test_step.dependOn(&run_tests3.step);
test_step.dependOn(&run_tests4.step);
test_step.dependOn(&run_tests5.step);
}

BIN
engine

Binary file not shown.

BIN
engine.o

Binary file not shown.

View File

@ -103,6 +103,7 @@ pub fn main() !void {
.keyword_quit => {
break;
},
.eof => {},
else => {
std.debug.print("Command need to start with a keyword, including: run, schema, help and quit\n", .{});
},
@ -157,17 +158,30 @@ fn buildEngine() !void {
for (dtypes.struct_name_list) |struct_name| {
data_dir.makeDir(struct_name) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => @panic("Error other than path already exists when trying to create the a member directory.\n"),
};
const dir = try data_dir.openDir(struct_name, .{});
_ = dir.createFile("main.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => @panic("Error: can't create main.zippondata"),
};
_ = dir.createFile("1.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => @panic("Error: can't create 1.zippondata"),
else => @panic("Error other than path already exists when trying to create a struct directory.\n"),
};
const struct_dir = try data_dir.openDir(struct_name, .{});
const member_names = dtypes.structName2structMembers(struct_name);
for (member_names) |member_name| {
struct_dir.makeDir(member_name) catch |err| switch (err) {
error.PathAlreadyExists => return,
else => @panic("Error other than path already exists when trying to create a member directory.\n"),
};
const member_dir = try struct_dir.openDir(member_name, .{});
blk: {
const file = member_dir.createFile("main.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => break :blk,
else => @panic("Error: can't create main.zippondata"),
};
try file.writeAll("\n");
}
_ = member_dir.createFile("1.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => @panic("Error: can't create 1.zippondata"),
};
}
}
}

View File

@ -3,7 +3,9 @@ const dtypes = @import("dtypes.zig");
const UUID = @import("uuid.zig").UUID;
const ziqlTokenizer = @import("tokenizers/ziqlTokenizer.zig").Tokenizer;
const ziqlToken = @import("tokenizers/ziqlTokenizer.zig").Token;
const grabParser = @import("query_functions/GRAB.zig").Parser;
const Allocator = std.mem.Allocator;
const parseDataAndAddToFile = @import("query_functions/ADD.zig").parseDataAndAddToFile;
pub const Error = error{UUIDNotFound};
const stdout = std.io.getStdOut().writer();
@ -12,6 +14,9 @@ pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
const buffer = try allocator.alloc(u8, 1024);
defer allocator.free(buffer);
// Init the map storage string map that track all array of struct
var storage = std.StringHashMap(*std.ArrayList(dtypes.Types)).init(allocator);
defer storage.deinit();
@ -24,33 +29,32 @@ pub fn main() !void {
}
// Add user
const adrien = dtypes.User.init("Adrien", "adrien@gmail.com");
try storage.get("User").?.append(dtypes.Types{ .User = &adrien });
const adrien_get = storage.get("User").?.items[0].User;
if (std.meta.eql(adrien_get, &adrien)) {
try stdout.print("adrien == adrien_get\n\n", .{});
}
// Add a new user
// const newUser = try dtypes.User.new(allocator, "Adrien", "adrien@gmail.com");
// try storage.get("User").?.append(dtypes.Types{ .user = newUser });
//const adrien = dtypes.User.init("Adrien", "adrien@gmail.com");
//try storage.get("User").?.append(dtypes.Types{ .User = &adrien });
//const adrien_get = storage.get("User").?.items[0].User;
var args = try std.process.argsWithAllocator(allocator);
defer args.deinit();
// Remove the first argument
_ = args.next();
const null_term_query_str = args.next();
const null_term_query_str = args.next().?;
var ziqlToker = ziqlTokenizer.init(null_term_query_str.?);
const firstToken = ziqlToker.next();
switch (firstToken.tag) {
var ziqlToker = ziqlTokenizer.init(null_term_query_str);
const first_token = ziqlToker.next();
const struct_name_token = ziqlToker.next();
switch (first_token.tag) {
.keyword_grab => {
try stdout.print("Hello from engine\n", .{});
var parser = grabParser.init(&ziqlToker);
try parser.parse_additional_data();
},
.keyword_add => {
try stdout.print("Not yet implemented.\n", .{});
if (!isStructInSchema(ziqlToker.getTokenSlice(struct_name_token))) {
try stdout.print("Error: No struct named '{s}' in current schema.", .{ziqlToker.getTokenSlice(struct_name_token)});
return;
}
try parseDataAndAddToFile(allocator, ziqlToker.getTokenSlice(struct_name_token), &ziqlToker);
},
.keyword_update => {
try stdout.print("Not yet implemented.\n", .{});
@ -67,26 +71,14 @@ pub fn main() !void {
}
}
fn getById(array: anytype, id: UUID) !*dtypes.User {
for (array.items) |data| {
if (data.id.compare(id)) {
return data;
/// Check if a string is a name of a struct in the currently use engine
fn isStructInSchema(struct_name_to_check: []const u8) bool {
if (std.mem.eql(u8, struct_name_to_check, "describe")) return true;
for (dtypes.struct_name_list) |struct_name| {
if (std.mem.eql(u8, struct_name_to_check, struct_name)) {
return true;
}
}
return error.UUIDNotFound;
}
// Function to add and test:
// - Create one entity
// - Search one entity filtering a list of key/value. Eg: User with name = 'Adrien' and age > 10
test "getById" {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
var users = std.ArrayList(*dtypes.User).init(allocator);
try users.append(try dtypes.User.new(allocator, "Adrien", "adrien@gmail.com"));
const adrien = try getById(users, users.items[0].id);
try std.testing.expect(UUID.compare(users.items[0].id, adrien.id));
return false;
}

View File

@ -1,15 +1,18 @@
const std = @import("std");
const UUID = @import("uuid.zig").UUID;
const dataParsing = @import("parsers/data-parsing.zig");
pub const parameter_max_file_size = 1e+7; // THe number of bytes than each file can be before splitting
pub const parameter_max_file_size_in_bytes = 500; // THe number of bytes than each file can be before splitting
pub const User = struct {
id: UUID,
name: []const u8,
email: []const u8,
age: i64,
scores: []i64,
pub fn init(name: []const u8, email: []const u8) User {
return User{ .id = UUID.init(), .name = name, .email = email };
pub fn init(name: []const u8, email: []const u8, age: i64, scores: []i64) User {
return User{ .id = UUID.init(), .name = name, .email = email, .age = age, .scores = scores };
}
};
@ -27,14 +30,43 @@ pub const Types = union {
Message: *const Message,
};
// Maybe create a struct like StructMetadata for the string list of member and name, ect
pub const struct_name_list: [2][]const u8 = .{
"User",
"Message",
};
pub const struct_member_list: [2][][]const u8 = .{
.{ "name", "email" },
.{"content"},
pub const struct_member_list: [2][]const []const u8 = .{
&[_][]const u8{ "name", "email", "age", "scores" },
&[_][]const u8{"content"},
};
// For now there is 4 types of data: str, int, float, bool
const MemberTypes = enum { int, float, bool, str };
pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n";
/// User a map of member name / value string to create a new entity of a type
/// The goal being an array of map while parsing files to then return an array of entities and do some fileting on it.
pub fn createEntityFromMap(allocator: std.mem.Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) !*Types {
var t = try allocator.create(Types);
if (std.mem.eql(u8, struct_name, "User")) {
const age = try std.fmt.parseInt(i64, map.get("age").?, 10);
const scores = dataParsing.parseArrayInt(allocator, map.get("scores").?);
t.User = &User.init(map.get("name").?, map.get("email").?, age, scores.items);
} else {
return error.UnknowStructName;
}
return t;
}
/// Get the list of all member name for a struct name
pub fn structName2structMembers(struct_name: []const u8) []const []const u8 {
var i: u16 = 0;
while (i < struct_name_list.len) : (i += 1) {
if (std.mem.eql(u8, struct_name_list[i], struct_name)) break;
}
return struct_member_list[i];
}

View File

@ -1,33 +0,0 @@
const std = @import("std");
const UUID = @import("uuid.zig").UUID;
pub const User = struct {
id: UUID,
name: []u8,
email: []u8,
pub fn init(id: UUID, name: []const u8, email: []const u8) User {
return User{ .id = id, .name = name, .email = email };
}
};
pub const Message = struct {
id: UUID,
content: []u8,
pub fn init(id: UUID, content: []const u8) Message {
return Message{ .id = id, .content = content };
}
};
pub const Types = union {
User: *User,
Message: *Message,
};
pub const struct_name_list: [2][]const u8 = .{
"User",
"Message",
};
pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n";

62
src/dtypes_test.zig Normal file
View File

@ -0,0 +1,62 @@
const std = @import("std");
const UUID = @import("uuid.zig").UUID;
pub const parameter_max_file_size_in_bytes = 500; // THe number of bytes than each file can be before splitting
pub const User = struct {
id: UUID,
name: []const u8,
email: []const u8,
pub fn init(name: []const u8, email: []const u8) User {
return User{ .id = UUID.init(), .name = name, .email = email };
}
};
pub const Message = struct {
id: UUID,
content: []const u8,
pub fn init(content: []const u8) Message {
return Message{ .id = UUID.init(), .content = content };
}
};
pub const Types = union {
User: *const User,
Message: *const Message,
};
// Maybe create a struct like StructMetadata for the string list of member and name, ect
pub const struct_name_list: [2][]const u8 = .{
"User",
"Message",
};
pub const struct_member_list: [2][]const []const u8 = .{
&[_][]const u8{ "name", "email" },
&[_][]const u8{"content"},
};
pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n";
pub fn createEntityFromMap(allocator: std.mem.Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) !*Types {
var t = try allocator.create(Types);
if (std.mem.eql(u8, struct_name, "User")) {
t.User = &User.init(map.get("name").?, map.get("email").?);
} else {
return error.UnknowStructName;
}
return t;
}
/// Get the list of all member name for a struct name
pub fn structName2structMembers(struct_name: []const u8) []const []const u8 {
var i: u16 = 0;
while (i < struct_name_list.len) : (i += 1) {
if (std.mem.eql(u8, struct_name_list[i], struct_name)) break;
}
return struct_member_list[i];
}

View File

@ -0,0 +1,39 @@
const std = @import("std");
// Series of functions to use just before creating an entity.
// Will transform the string of data into data of the right type.
pub fn parseInt(value_str: []const u8) i64 {
return std.fmt.parseInt(i64, value_str, 10) catch return 0;
}
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) {
var array = std.ArrayList(i64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseInt(x)) catch {};
}
return array;
}
test "Data parsing" {
const allocator = std.testing.allocator;
// Int
const in1: [3][]const u8 = .{ "1", "42", "Hello" };
const expected_out1: [3]i64 = .{ 1, 42, 0 };
for (in1, 0..) |value, i| {
try std.testing.expect(parseInt(value) == expected_out1[i]);
}
std.debug.print("OK\tData parsing: Int\n", .{});
// Int array
const in2 = "[1 14 44 42 hello]";
const out2 = parseArrayInt(allocator, in2);
defer out2.deinit();
const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2));
std.debug.print("OK\tData parsing: Int array\n", .{});
}

View File

@ -143,6 +143,8 @@ pub const Parser = struct {
}
}
// Use @embedFile
// Make the union `Type` with all different struct
self.writeToFile("pub const Types = union {\n");
for (struct_array.items) |struct_name| {

335
src/query_functions/ADD.zig Normal file
View File

@ -0,0 +1,335 @@
const std = @import("std");
const dtypes = @import("../dtypes.zig");
const UUID = @import("../uuid.zig").UUID;
const ziqlTokenizer = @import("../tokenizers/ziqlTokenizer.zig").Tokenizer;
const ziqlToken = @import("../tokenizers/ziqlTokenizer.zig").Token;
const Allocator = std.mem.Allocator;
const stdout = std.io.getStdOut().writer();
// TODO to improve this part of the code:
// 1. Use logging
// 2. Create a struct that manage files with member: stdout, folder (e.g. the User folder),
// Query that need to work now
// ADD User (name='Adrien', email='adrien.bouvais@gmail.com') OK
// ADD User (name='Adrien', email='adrien.bouvais@gmail.com', age = 26) OK
// ADD User (name='Adrien', email='adrien.bouvais@gmail.com', books = ['book1', 'book2']) OK
// ADD User (name='Adrien', email=null) OK
//
// For later: links
// ADD User (name = 'Adrien', best_friend = {name='bob'}, friends = {name != 'bob'}) NOT OK
// ADD User (name = 'Adrien', friends = {(name = 'bob' AND age > 16) OR (id = '0000-0000')} ) NOT OK
// TODO: make real test
/// Function for the ADD query command.
/// It will parse the reste of the query and create a map of member name / value.
/// Then add those value to the appropriete file. The proper file is the first one with a size < to the limit.
/// If no file is found, a new one is created.
pub fn parseDataAndAddToFile(allocator: Allocator, struct_name: []const u8, toker: *ziqlTokenizer) !void {
const token = toker.next();
switch (token.tag) {
.l_paren => {},
else => {
try stdout.print("Error: Expected ( after the struct name of an ADD command.\nE.g. ADD User (name = 'bob')\n", .{});
return;
},
}
const buffer = try allocator.alloc(u8, 1024 * 100);
defer allocator.free(buffer);
var member_map = getMapOfMember(allocator, toker) catch return;
defer member_map.deinit();
if (!checkIfAllMemberInMap(struct_name, &member_map)) return;
const entity = try dtypes.createEntityFromMap(allocator, struct_name, member_map);
const uuid_str = entity.User.*.id.format_uuid();
defer stdout.print("Added new {s} successfully using UUID: {s}\n", .{
struct_name,
uuid_str,
}) catch {};
const member_names = dtypes.structName2structMembers(struct_name);
for (member_names) |member_name| {
var file_map = getFilesStat(allocator, struct_name, member_name) catch {
try stdout.print("Error: File stat error", .{});
return;
};
const potential_file_name_to_use = getFirstUsableFile(file_map);
if (potential_file_name_to_use) |file_name| {
const file_index = fileName2Index(file_name) catch @panic("Error in fileName2Index");
try stdout.print("Using file: {s} with a size of {d}\n", .{ file_name, file_map.get(file_name).?.size });
const path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/{s}", .{
struct_name,
member_name,
file_name,
});
var file = std.fs.cwd().openFile(path, .{
.mode = .read_write,
}) catch {
try stdout.print("Error opening data file.", .{});
return;
};
defer file.close();
try file.seekFromEnd(0);
try file.writer().print("{s} {s}\n", .{ uuid_str, member_map.get(member_name).? });
const path_to_main = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/main.zippondata", .{
struct_name,
member_name,
});
var file_main = std.fs.cwd().openFile(path_to_main, .{
.mode = .read_write,
}) catch {
try stdout.print("Error opening data file.", .{});
return;
};
defer file_main.close();
try appendToLineAtIndex(allocator, file_main, file_index, &uuid_str);
} else {
const max_index = maxFileIndex(file_map);
const new_file_path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/{d}.zippondata", .{
struct_name,
member_name,
max_index + 1,
});
try stdout.print("new file path: {s}\n", .{new_file_path});
// TODO: Create new file and save the data inside
const new_file = std.fs.cwd().createFile(new_file_path, .{}) catch @panic("Error creating new data file");
defer new_file.close();
try new_file.writer().print("{s} {s}\n", .{ &uuid_str, member_map.get(member_name).? });
const path_to_main = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/main.zippondata", .{
struct_name,
member_name,
});
var file_main = std.fs.cwd().openFile(path_to_main, .{
.mode = .read_write,
}) catch {
try stdout.print("Error opening data file.", .{});
@panic("");
};
defer file_main.close();
try file_main.seekFromEnd(0);
try file_main.writeAll("\n ");
try file_main.seekTo(0);
try appendToLineAtIndex(allocator, file_main, max_index + 1, &uuid_str);
}
}
}
/// Take the main.zippondata file, the index of the file where the data is saved and the string to add at the end of the line
fn appendToLineAtIndex(allocator: std.mem.Allocator, file: std.fs.File, index: usize, str: []const u8) !void {
const buffer = try allocator.alloc(u8, 1024 * 100);
defer allocator.free(buffer);
var reader = file.reader();
var line_num: usize = 1;
while (try reader.readUntilDelimiterOrEof(buffer, '\n')) |_| {
if (line_num == index) {
try file.seekBy(-1);
try file.writer().print("{s} ", .{str});
return;
}
line_num += 1;
}
}
/// Return a map of file path => Stat for one struct and member name
fn getFilesStat(allocator: Allocator, struct_name: []const u8, member_name: []const u8) !*std.StringHashMap(std.fs.File.Stat) {
const cwd = std.fs.cwd();
const buffer = try allocator.alloc(u8, 1024); // Adjust the size as needed
defer allocator.free(buffer);
const path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}", .{ struct_name, member_name });
var file_map = std.StringHashMap(std.fs.File.Stat).init(allocator);
const member_dir = cwd.openDir(path, .{ .iterate = true }) catch {
try stdout.print("Error opening struct directory", .{});
@panic("");
};
var iter = member_dir.iterate();
while (try iter.next()) |entry| {
if (entry.kind != std.fs.Dir.Entry.Kind.file) continue;
const file_stat = member_dir.statFile(entry.name) catch
{
try stdout.print("Error getting stat of a file", .{});
@panic("");
};
file_map.put(entry.name, file_stat) catch @panic("Error adding stat to map");
}
return &file_map;
}
/// Use the map of file stat to find the first file with under the bytes limit.
/// return the name of the file. If none is found, return null.
fn getFirstUsableFile(map: *std.StringHashMap(std.fs.File.Stat)) ?[]const u8 {
var iter = map.keyIterator();
while (iter.next()) |key| {
if (std.mem.eql(u8, key.*, "main.zippondata")) continue;
if (map.get(key.*).?.size < dtypes.parameter_max_file_size_in_bytes) return key.*;
}
return null;
}
fn fileName2Index(file_name: []const u8) !usize {
try stdout.print("Got file name: {s}\n", .{file_name});
var iter_file_name = std.mem.tokenize(u8, file_name, ".");
const num_str = iter_file_name.next().?;
const num: usize = try std.fmt.parseInt(usize, num_str, 10);
return num;
}
/// Iter over all file and get the max name and return the value of it as i32
/// So for example if there is 1.zippondata and 2.zippondata it return 2.
fn maxFileIndex(map: *std.StringHashMap(std.fs.File.Stat)) usize {
var iter = map.keyIterator();
var index_max: usize = 0;
while (iter.next()) |key| {
if (std.mem.eql(u8, key.*, "main.zippondata")) continue;
var iter_file_name = std.mem.tokenize(u8, key.*, ".");
const num_str = iter_file_name.next().?;
const num: usize = std.fmt.parseInt(usize, num_str, 10) catch @panic("Error parsing file name into usize");
if (num > index_max) index_max = num;
}
return index_max;
}
const MemberMapError = error{
NotMemberName,
NotEqualSign,
NotStringOrNumber,
NotComma,
PuttingNull,
};
/// Take the tokenizer and return a map of the query for the ADD command.
/// Keys are the member name and value are the string of the value in the query. E.g. 'Adrien' or '10'
pub fn getMapOfMember(allocator: Allocator, toker: *ziqlTokenizer) !std.StringHashMap([]const u8) {
std.debug.print("Started\n\n", .{});
var token = toker.next();
std.debug.print("{any}\n\n", .{token});
var member_map = std.StringHashMap([]const u8).init(
allocator,
);
std.debug.print("OK \n\n", .{});
while (token.tag != ziqlToken.Tag.eof) : (token = toker.next()) {
std.debug.print("{any}\n\n", .{token});
switch (token.tag) {
.r_paren => continue,
.invalid => stdout.print("Error: Invalid token: {s}", .{toker.getTokenSlice(token)}) catch {},
.identifier => {
const member_name_str = toker.getTokenSlice(token);
token = toker.next();
switch (token.tag) {
.equal => {
token = toker.next();
switch (token.tag) {
.string_literal, .number_literal => {
const value_str = toker.getTokenSlice(token);
member_map.put(member_name_str, value_str) catch @panic("Could not add member name and value to map in getMapOfMember");
token = toker.next();
switch (token.tag) {
.comma, .r_paren => continue,
else => {
stdout.print("Error: Expected , after string or number got: {s}. E.g. ADD User (name='bob', age=10)", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotComma;
},
}
},
.keyword_null => {
try stdout.print("Found null value\n", .{});
const value_str = "null";
member_map.put(member_name_str, value_str) catch {
try stdout.print("Error putting null value into the map\n", .{});
return MemberMapError.PuttingNull;
};
token = toker.next();
switch (token.tag) {
.comma, .r_paren => continue,
else => {
stdout.print("Error: Expected , after string or number got: {s}. E.g. ADD User (name='bob', age=10)", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotComma;
},
}
},
.l_bracket => {
var array_values = std.ArrayList([]const u8).init(allocator);
token = toker.next();
while (token.tag != ziqlToken.Tag.r_bracket) : (token = toker.next()) {
switch (token.tag) {
.string_literal, .number_literal => {
const value_str = toker.getTokenSlice(token);
array_values.append(value_str) catch @panic("Could not add value to array in getMapOfMember");
},
.invalid => stdout.print("Error: Invalid token: {s}", .{toker.getTokenSlice(token)}) catch {},
else => {
stdout.print("Error: Expected string or number in array got: {s}. E.g. ADD User (scores=[10 20 30])", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotStringOrNumber;
},
}
}
const array_str = try std.mem.join(allocator, " ", array_values.items);
member_map.put(member_name_str, array_str) catch @panic("Could not add member name and value to map in getMapOfMember");
}, // TODO
else => {
stdout.print("Error: Expected string or number after a = got: {s}. E.g. ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotStringOrNumber;
},
}
},
else => {
stdout.print("Error: Expected = after a member declaration get {s}. E.g. ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotEqualSign;
},
}
},
else => {
stdout.print("Error: Unknow token: {s}. This should be the name of a member. E.g. name in ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {};
return MemberMapError.NotMemberName;
},
}
}
return member_map;
}
/// Using the name of a struct from dtypes and the map of member name => value string from the query.
/// Check if the map keys are exactly the same as the name of the member of the struct.
/// Basically checking if the query contain all value that a struct need to be init.
fn checkIfAllMemberInMap(struct_name: []const u8, map: *std.StringHashMap([]const u8)) bool {
const all_struct_member = dtypes.structName2structMembers(struct_name);
var count: u16 = 0;
for (all_struct_member) |key| {
if (map.contains(key)) count += 1 else stdout.print("Error: ADD query of struct: {s}; missing member: {s}\n", .{
struct_name,
key,
}) catch {};
}
return ((count == all_struct_member.len) and (count == map.count()));
}

View File

@ -0,0 +1,100 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ziqlTokenizer = @import("../tokenizers/ziqlTokenizer.zig").Tokenizer;
// To work now
// GRAB User {}
// GRAB User {name = 'Adrien'}
// GRAB User {name='Adrien' AND age < 30}
// GRAB User [1] {}
// GRAB User [10; name] {age < 30}
//
// For later
const stdout = std.io.getStdOut().writer();
const AdditionalData = struct {
entity_to_find: usize = 0,
member_to_find: std.ArrayList([]const u8),
pub fn init(allocator: Allocator) AdditionalData {
return AdditionalData{ .member_to_find = std.ArrayList(u8).init(allocator) };
}
};
pub const Parser = struct {
allocator: Allocator,
additional_data: AdditionalData,
toker: *ziqlTokenizer,
state: State,
const State = enum {
start,
invalid,
end,
expect_additional_data,
expect_count_of_entity_to_find,
expect_semicolon,
expect_filter,
};
pub fn init(allocator: Allocator, toker: *ziqlTokenizer) Parser {
return Parser{ .allocator = allocator, .toker = toker, .state = State.expect_additional_data, .additional_data = AdditionalData.init(allocator) };
}
pub fn deinit(self: *Parser) void {
self.additional_data.member_to_find.deinit();
}
pub fn parse_additional_data(self: *Parser) !void {
var token = self.toker.next();
while (self.state != State.end) : (token = self.toker.next()) {
switch (self.state) {
.expect_additional_data => {
switch (token.tag) {
.l_bracket => {
try stdout.print("Additional data found.\n", .{});
self.state = State.expect_count_of_entity_to_find;
},
else => {
try stdout.print("No additional data found.\n", .{});
self.state = State.expect_filter;
},
}
},
.expect_count_of_entity_to_find => {
switch (token.tag) {
.number_literal => {
try stdout.print("Count of entity found.\n", .{});
self.state = State.expect_semicolon;
},
else => {
try stdout.print("No count of entity found.\n", .{});
self.state = State.expect_filter;
},
}
},
.expect_semicolon => {
switch (token.tag) {
.semicolon => {
try stdout.print("Found semiconlon.\n", .{});
self.state = State.expect_semicolon;
},
else => {
try stdout.print("Expected semicon here: {s}.\n", .{self.toker.buffer[token.loc.start - 5 .. token.loc.end + 5]});
self.state = State.invalid;
},
}
},
.invalid => {
return;
},
else => {
try stdout.print("End\n", .{});
},
}
}
}
};

22
src/test.zig Normal file
View File

@ -0,0 +1,22 @@
const std = @import("std");
const dtypes = @import("dtypes.zig");
const UUID = @import("uuid.zig").UUID;
const ziqlTokenizer = @import("tokenizers/ziqlTokenizer.zig").Tokenizer;
const ziqlToken = @import("tokenizers/ziqlTokenizer.zig").Token;
// Test for functions in for_add.zig
const getMapOfMember = @import("query_functions/ADD.zig").getMapOfMember;
test "Get map of members" {
const allocator = std.testing.allocator;
const in = "(name='Adrien', email='adrien@gmail.com', age=26, scores=[42 100 5])";
const null_term_in = try allocator.dupeZ(u8, in);
var toker = ziqlTokenizer.init(null_term_in);
const member_map = try getMapOfMember(allocator, &toker);
std.debug.print("{s}", .{member_map.get("name").?});
allocator.free(null_term_in);
}

View File

@ -16,6 +16,7 @@ pub const Token = struct {
.{ "DELETE", .keyword_delete },
.{ "ADD", .keyword_add },
.{ "IN", .keyword_in },
.{ "null", .keyword_null },
.{ "__DESCRIBE__", .keyword__describe__ },
});
@ -32,6 +33,7 @@ pub const Token = struct {
keyword_delete,
keyword_add,
keyword_in,
keyword_null,
keyword__describe__,
string_literal,
@ -62,9 +64,8 @@ pub const Tokenizer = struct {
buffer: [:0]const u8,
index: usize,
/// For debugging purposes.
pub fn dump(self: *Tokenizer, token: *const Token) void {
std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] });
pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 {
return self.buffer[token.loc.start..token.loc.end];
}
pub fn init(buffer: [:0]const u8) Tokenizer {

View File

@ -31,7 +31,7 @@ pub const UUID = struct {
std.mem.copyForwards(u8, slice, &string);
}
fn format_uuid(self: UUID) [36]u8 {
pub fn format_uuid(self: UUID) [36]u8 {
var buf: [36]u8 = undefined;
buf[8] = '-';
buf[13] = '-';

Binary file not shown.