diff --git a/README.md b/README.md index 460e8be..c033b37 100644 --- a/README.md +++ b/README.md @@ -1,146 +1,163 @@ # ZipponDB +Open-source database written 100% in zig. + ![alt text](https://github.com/MrBounty/ZipponDB/blob/main/logo.jpeg) -# Written in Zig +# Introduction -Zig is fast, blablabla +ZipponDB is a relational database written entirely in Zig from stractch. +It use a custom query language named ZipponQL or ZiQL for short. -# How it's work +The first time you run ZipponDB, it will create a new ZipponDB directory and start the Zippon CLI. +From here, you can create a new engine by running `schema build`. It will get the file `schema.zipponschema` and build a custom binary +using zig that the CLI will then use to manipulate data. You then interact with the engine by using `run "My query go here"` or +by directly using the engine binary. -Meme "That's the neat part..." +## Why Zippon ? -Zippon is a strutural relational potentially in memory database written entirely in Zig from stractch. +I first started ZipponDB to learn, but I think that in order to learn, you need to build something real, so I chose to do a database +as I try to become an expert in it. + +Now for Zippon advantages: +- Open-source and 100% in Zig with 0 dependencies +- Relational database +- Small, fast and implementable everywhere + +# Declare a schema + +ZipponDB need a schema to work. A schema is a way to define how your data will be store. Compared to SQL, you can see it as a +file where you declare all table name, columns name, data type and relationship. But here you declare struct. A struct have a name and +members. A member is one data or link and have a type associated. Here a simple example for a user: + +``` +User ( + name: str, + email: str, + best_friend: User, + friends: []User, +) +``` + +In this example each user have a name and email as a string. But also one best friend as a link. [] mean that there is a +list of this value. Note that all value can be null exept list, they can be empty. + +Here a more advance example with multiple struct: +``` +User { + name: str, + email: str, + friends: []User, + posts: []Post, + liked_post: []Post, + comments: []Comment, + liked_com: []Comment, +} + +Post { + title: str, + image: str, + at: date, + from: User, + like_by: []User, + comments: []Comment, +} + +Comment { + content: str, + at: date, + from: User, + like_by: []User, + of: Post, +} +``` + +Note: data not yet implemented. + +# ZipponQL + +Zippon have it's own query language. Why ? Idk, I wanted to do it. + +The language itself is fairly easy in my opinion. Here the basic: + +- {} Are filters +- [] Are how much; what data +- () Are new or updated data (Not already in file) +- || Are additional options +- Link need to be specify between [] to be return, other are returned automatically +- Data are in struct format and can have link + +### Some examples + +`GRAB User` +Get all users + +`GRAB User { name = 'Adrien' }` +Get all user named Adrien + +`GRAB User [1; email]` +Get one user email + +`GRAB User | ASCENDING name |` +Get all users ordered by name + +`GRAB User [name] { age > 10 AND name != 'Adrien' } | DECENDING age |` +Get just the name of all users that are more than 10 years old and not named Adrien + +`GRAB User [1] { bestfriend = { name = 'Adrien' } }` +Get one user that have a best friend named Adrien + +`GRAB User [10; friends [1]] { age > 10 } | ASC name |` +Get one friend of the 10 first user above 10 years old in ascending name. + +### Not yet implemented + +`GRAB Message [100; comments [ date ] ] { .writter = { name = 'Adrien' }.bestfriend }` +Get the date of 100 comments written by the best friend of a user named Adrien + +`GRAB User { IN Message { date > '12-01-2014' }.writter }` +Get all users that sended a message after the 12 january 2014 + +`GRAB User { !IN Comment { }.writter }` +Get all user that didn't wrote a comment + +`GRAB User { IN User { name = 'Adrien' }.friends }` +Get all user that are friends with an Adrien + +`UPDATE User [1] { name = 'Adrien' } => ( email = 'new@email.com' )` + +`REMOVE User { id = '000-000' }` + +`ADD User ( name = 'Adrien', email = 'email', age = 40 )` -You build a binary according to your schema, you can just run it to acces a CLI and it will create and manage a folder 'zipponDB_DATA'. -Then you do what you want with it, including: -- Run it with your app as a seperated process and folder -- Create a Docker and open some port -- Create a Docker with a small API like flask -- Other stuffs, Im sure some will find something nice # Integration +For now there is only a python intregration, but because it is just 2-3 command, it is easy to implement with other language. + ## Python ```python import zippondb as zdb client = zdb.newClient('path/to/binary') -print(client.exe('describe')) +client.exe('schema build') +print(client.exe('schema describe')) -# Return named tuple -users = client.exe('GRAB User {}') +# Return named tuple of all users +users = client.run('GRAB User {}') for user in users: print(user.name) - -client.exe('save') ``` -# Benchmark +# Roadmap -I did a database with random data. The schema is like that: -``` -User { - name: str, - email: str, - friends: []User.friends, - posts: []Post.from, - liked_post: []Post.like_by, - comments: []Comment.from, - liked_com: []Comment.like_by, -} - -Post { - title: str, - image: str, - at: date, - from: User.posts, - like_by: []User.liked_post, - comments: []Comment.of, -} - -Comment { - content: str, - at: date, - from: User.comments, - like_by: User.liked_com, - of: Post.comments, -} -``` - -As you can see, link need to be defined in both struct. [] mean an array of value. -For example `posts: []Post.from,` and `from: User.posts,` mean that a `User` can have multiple posts (an array of `Post`) and a post -just one author. Both linked by the value `posts` and `from`. - -# Create a schema - -Zippon use struct as way of saving data. A struct is a way of storing multiple data of different type. -Very similar to a row in a table, columns being datatype and a row a single struct. - -The schema is directly INSIDE the binary, so each binary are per schema ! This is for effenciency, idk to be honest, I guess ? lol - -# Migration - -For now you can't migrate the data of one binary to another, so you will need to different binary. - -# Zippon language - -Ok so I went crazy on that, on have it how language. It is stupide and I love it. I wanted to do like EdgeDB but no, too simple. -Anyway, I tried to do something different, to do something different, idk, you're the jduge of it. - -``` -GRAB User { name = 'Adrien' } -Get all user named Adrien - -GRAB User [1; email] { } -Get one email - -GRAB User {} | ASCENDING name | -Get all users ordered by name - -GRAB User [name] { age > 10 AND name != 'Adrien' } | DECENDING age | -Get just the name of all users that are 10 years old or more and not named Adrien ordered by age - -GRAB User { bestfriend = { name = 'Adrien' } } -GRAB User { bestfriend = User{ name = 'Adrien' } } // Same -Get all user that have a best friend named Adrien - -GRAB User [10] { IN User [1] { age > 10 } | ASC name |.friends } -Get 10 users that are friend with the first user older than 10 years old in ascending name order - -GRAB Message [100; comments [ date ] ] { .writter = { name = 'Adrien' }.bestfriend } -Get the date of 100 comments from the best friend of the writter named Adrien - -GRAB User { IN Message { date > '12-01-2014' }.writter } -Get all users that sended a message after the 12 january 2014 - -GRAB User { !IN Comment { }.writter } -Get all user that didn't wrote a comment - -GRAB User { IN User { name = 'Adrien' }.friends } -Get all user that are friends with an Adrien - -UPDATE User [1] { name = 'Adrien' } => ( email = 'new@email.com' ) - -REMOVE User { id = '000-000' } - -ADD User ( name = 'Adrien', email = 'email', age = 40 } -``` - -- {} Are filters -- [] Are how much; what data -- () Are new or updated data (Not already savec) -- || Are additional options -- Data are in struct format and can have link -- By default all value other than a link are return per query, to prevent recurcive return (User.friends in User.friends) - - -# How it's really work - -NOTE: Do this in a separe file - -## Tokenizer - -The tokenizer of the language is -# ZipponDB +[ ] Beta without link +[ ] Relationships/links +[ ] Multi threading +[ ] Transaction +[ ] Docker image +[ ] Migration of schema +[ ] Dump/Bump data +[ ] In memory option +[ ] Archives diff --git a/ZipponDB/DATA/Message/1.zippondata b/ZipponDB/DATA/Message/content/1.zippondata similarity index 100% rename from ZipponDB/DATA/Message/1.zippondata rename to ZipponDB/DATA/Message/content/1.zippondata diff --git a/ZipponDB/DATA/Message/content/main.zippondata b/ZipponDB/DATA/Message/content/main.zippondata new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/ZipponDB/DATA/Message/content/main.zippondata @@ -0,0 +1 @@ + diff --git a/ZipponDB/DATA/User/1.zippondata b/ZipponDB/DATA/User/1.zippondata deleted file mode 100644 index e69de29..0000000 diff --git a/ZipponDB/DATA/User/age/1.zippondata b/ZipponDB/DATA/User/age/1.zippondata new file mode 100644 index 0000000..214106d --- /dev/null +++ b/ZipponDB/DATA/User/age/1.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 26 diff --git a/ZipponDB/DATA/User/age/main.zippondata b/ZipponDB/DATA/User/age/main.zippondata new file mode 100644 index 0000000..daf752a --- /dev/null +++ b/ZipponDB/DATA/User/age/main.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 \ No newline at end of file diff --git a/ZipponDB/DATA/User/email/1.zippondata b/ZipponDB/DATA/User/email/1.zippondata new file mode 100644 index 0000000..16447e1 --- /dev/null +++ b/ZipponDB/DATA/User/email/1.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 'adrien@gmail.com' diff --git a/ZipponDB/DATA/User/email/main.zippondata b/ZipponDB/DATA/User/email/main.zippondata new file mode 100644 index 0000000..daf752a --- /dev/null +++ b/ZipponDB/DATA/User/email/main.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 \ No newline at end of file diff --git a/ZipponDB/DATA/User/main.zippondata b/ZipponDB/DATA/User/main.zippondata deleted file mode 100644 index e69de29..0000000 diff --git a/ZipponDB/DATA/User/name/1.zippondata b/ZipponDB/DATA/User/name/1.zippondata new file mode 100644 index 0000000..3d54d58 --- /dev/null +++ b/ZipponDB/DATA/User/name/1.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 'Adrien' diff --git a/ZipponDB/DATA/User/name/main.zippondata b/ZipponDB/DATA/User/name/main.zippondata new file mode 100644 index 0000000..daf752a --- /dev/null +++ b/ZipponDB/DATA/User/name/main.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 \ No newline at end of file diff --git a/ZipponDB/DATA/User/scores/1.zippondata b/ZipponDB/DATA/User/scores/1.zippondata new file mode 100644 index 0000000..d9ab390 --- /dev/null +++ b/ZipponDB/DATA/User/scores/1.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 null diff --git a/ZipponDB/DATA/User/scores/main.zippondata b/ZipponDB/DATA/User/scores/main.zippondata new file mode 100644 index 0000000..daf752a --- /dev/null +++ b/ZipponDB/DATA/User/scores/main.zippondata @@ -0,0 +1 @@ +2a7bf36b-20c8-4a1d-8f86-398311220000 \ No newline at end of file diff --git a/build.zig b/build.zig index f142c8b..815a529 100644 --- a/build.zig +++ b/build.zig @@ -19,21 +19,50 @@ pub fn build(b: *std.Build) void { run_step.dependOn(&run_cmd.step); // Test step - const cliTokenizer_tests = b.addTest(.{ + const tests1 = b.addTest(.{ + .root_source_file = b.path("src/parsers/data-parsing.zig"), + .target = target, + .optimize = optimize, + .name = "Data parsing", + }); + const run_tests1 = b.addRunArtifact(tests1); + + const tests2 = b.addTest(.{ .root_source_file = b.path("src/tokenizers/cliTokenizer.zig"), .target = target, .optimize = optimize, - .name = "CLID Tokenizer test", + .name = "CLI tokenizer", }); - const ziqlTokenizer_tests = b.addTest(.{ + const run_tests2 = b.addRunArtifact(tests2); + + const tests3 = b.addTest(.{ .root_source_file = b.path("src/tokenizers/ziqlTokenizer.zig"), .target = target, .optimize = optimize, - .name = "ZiQL Tokenizer test", + .name = "ZiQL tokenizer", }); - const run_cliTokenizer_tests = b.addRunArtifact(cliTokenizer_tests); - const run_ziqlTokenizer_tests = b.addRunArtifact(ziqlTokenizer_tests); + const run_tests3 = b.addRunArtifact(tests3); + + const tests4 = b.addTest(.{ + .root_source_file = b.path("src/tokenizers/schemaTokenizer.zig"), + .target = target, + .optimize = optimize, + .name = "Schema tokenizer", + }); + const run_tests4 = b.addRunArtifact(tests4); + + const tests5 = b.addTest(.{ + .root_source_file = b.path("src/test.zig"), + .target = target, + .optimize = optimize, + .name = "ADD functions", + }); + const run_tests5 = b.addRunArtifact(tests5); + const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_cliTokenizer_tests.step); - test_step.dependOn(&run_ziqlTokenizer_tests.step); + test_step.dependOn(&run_tests1.step); + test_step.dependOn(&run_tests2.step); + test_step.dependOn(&run_tests3.step); + test_step.dependOn(&run_tests4.step); + test_step.dependOn(&run_tests5.step); } diff --git a/engine b/engine index bc35e25..fa49cf6 100755 Binary files a/engine and b/engine differ diff --git a/engine.o b/engine.o index 6ddb03a..b331a11 100644 Binary files a/engine.o and b/engine.o differ diff --git a/src/dbconsole.zig b/src/dbconsole.zig index 3891b53..eaec210 100644 --- a/src/dbconsole.zig +++ b/src/dbconsole.zig @@ -103,6 +103,7 @@ pub fn main() !void { .keyword_quit => { break; }, + .eof => {}, else => { std.debug.print("Command need to start with a keyword, including: run, schema, help and quit\n", .{}); }, @@ -157,17 +158,30 @@ fn buildEngine() !void { for (dtypes.struct_name_list) |struct_name| { data_dir.makeDir(struct_name) catch |err| switch (err) { error.PathAlreadyExists => {}, - else => @panic("Error other than path already exists when trying to create the a member directory.\n"), - }; - const dir = try data_dir.openDir(struct_name, .{}); - _ = dir.createFile("main.zippondata", .{}) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error: can't create main.zippondata"), - }; - _ = dir.createFile("1.zippondata", .{}) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error: can't create 1.zippondata"), + else => @panic("Error other than path already exists when trying to create a struct directory.\n"), }; + const struct_dir = try data_dir.openDir(struct_name, .{}); + + const member_names = dtypes.structName2structMembers(struct_name); + for (member_names) |member_name| { + struct_dir.makeDir(member_name) catch |err| switch (err) { + error.PathAlreadyExists => return, + else => @panic("Error other than path already exists when trying to create a member directory.\n"), + }; + const member_dir = try struct_dir.openDir(member_name, .{}); + + blk: { + const file = member_dir.createFile("main.zippondata", .{}) catch |err| switch (err) { + error.PathAlreadyExists => break :blk, + else => @panic("Error: can't create main.zippondata"), + }; + try file.writeAll("\n"); + } + _ = member_dir.createFile("1.zippondata", .{}) catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => @panic("Error: can't create 1.zippondata"), + }; + } } } diff --git a/src/dbengine.zig b/src/dbengine.zig index 58ea1aa..6f52b05 100644 --- a/src/dbengine.zig +++ b/src/dbengine.zig @@ -3,7 +3,9 @@ const dtypes = @import("dtypes.zig"); const UUID = @import("uuid.zig").UUID; const ziqlTokenizer = @import("tokenizers/ziqlTokenizer.zig").Tokenizer; const ziqlToken = @import("tokenizers/ziqlTokenizer.zig").Token; +const grabParser = @import("query_functions/GRAB.zig").Parser; const Allocator = std.mem.Allocator; +const parseDataAndAddToFile = @import("query_functions/ADD.zig").parseDataAndAddToFile; pub const Error = error{UUIDNotFound}; const stdout = std.io.getStdOut().writer(); @@ -12,6 +14,9 @@ pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); + const buffer = try allocator.alloc(u8, 1024); + defer allocator.free(buffer); + // Init the map storage string map that track all array of struct var storage = std.StringHashMap(*std.ArrayList(dtypes.Types)).init(allocator); defer storage.deinit(); @@ -24,33 +29,32 @@ pub fn main() !void { } // Add user - const adrien = dtypes.User.init("Adrien", "adrien@gmail.com"); - try storage.get("User").?.append(dtypes.Types{ .User = &adrien }); - const adrien_get = storage.get("User").?.items[0].User; - - if (std.meta.eql(adrien_get, &adrien)) { - try stdout.print("adrien == adrien_get\n\n", .{}); - } - - // Add a new user - // const newUser = try dtypes.User.new(allocator, "Adrien", "adrien@gmail.com"); - // try storage.get("User").?.append(dtypes.Types{ .user = newUser }); + //const adrien = dtypes.User.init("Adrien", "adrien@gmail.com"); + //try storage.get("User").?.append(dtypes.Types{ .User = &adrien }); + //const adrien_get = storage.get("User").?.items[0].User; var args = try std.process.argsWithAllocator(allocator); defer args.deinit(); // Remove the first argument _ = args.next(); - const null_term_query_str = args.next(); + const null_term_query_str = args.next().?; - var ziqlToker = ziqlTokenizer.init(null_term_query_str.?); - const firstToken = ziqlToker.next(); - switch (firstToken.tag) { + var ziqlToker = ziqlTokenizer.init(null_term_query_str); + const first_token = ziqlToker.next(); + const struct_name_token = ziqlToker.next(); + + switch (first_token.tag) { .keyword_grab => { - try stdout.print("Hello from engine\n", .{}); + var parser = grabParser.init(&ziqlToker); + try parser.parse_additional_data(); }, .keyword_add => { - try stdout.print("Not yet implemented.\n", .{}); + if (!isStructInSchema(ziqlToker.getTokenSlice(struct_name_token))) { + try stdout.print("Error: No struct named '{s}' in current schema.", .{ziqlToker.getTokenSlice(struct_name_token)}); + return; + } + try parseDataAndAddToFile(allocator, ziqlToker.getTokenSlice(struct_name_token), &ziqlToker); }, .keyword_update => { try stdout.print("Not yet implemented.\n", .{}); @@ -67,26 +71,14 @@ pub fn main() !void { } } -fn getById(array: anytype, id: UUID) !*dtypes.User { - for (array.items) |data| { - if (data.id.compare(id)) { - return data; +/// Check if a string is a name of a struct in the currently use engine +fn isStructInSchema(struct_name_to_check: []const u8) bool { + if (std.mem.eql(u8, struct_name_to_check, "describe")) return true; + + for (dtypes.struct_name_list) |struct_name| { + if (std.mem.eql(u8, struct_name_to_check, struct_name)) { + return true; } } - return error.UUIDNotFound; -} - -// Function to add and test: -// - Create one entity -// - Search one entity filtering a list of key/value. Eg: User with name = 'Adrien' and age > 10 - -test "getById" { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - var users = std.ArrayList(*dtypes.User).init(allocator); - try users.append(try dtypes.User.new(allocator, "Adrien", "adrien@gmail.com")); - - const adrien = try getById(users, users.items[0].id); - - try std.testing.expect(UUID.compare(users.items[0].id, adrien.id)); + return false; } diff --git a/src/dtypes.zig b/src/dtypes.zig index 4a67679..96438dd 100644 --- a/src/dtypes.zig +++ b/src/dtypes.zig @@ -1,15 +1,18 @@ const std = @import("std"); const UUID = @import("uuid.zig").UUID; +const dataParsing = @import("parsers/data-parsing.zig"); -pub const parameter_max_file_size = 1e+7; // THe number of bytes than each file can be before splitting +pub const parameter_max_file_size_in_bytes = 500; // THe number of bytes than each file can be before splitting pub const User = struct { id: UUID, name: []const u8, email: []const u8, + age: i64, + scores: []i64, - pub fn init(name: []const u8, email: []const u8) User { - return User{ .id = UUID.init(), .name = name, .email = email }; + pub fn init(name: []const u8, email: []const u8, age: i64, scores: []i64) User { + return User{ .id = UUID.init(), .name = name, .email = email, .age = age, .scores = scores }; } }; @@ -27,14 +30,43 @@ pub const Types = union { Message: *const Message, }; +// Maybe create a struct like StructMetadata for the string list of member and name, ect pub const struct_name_list: [2][]const u8 = .{ "User", "Message", }; -pub const struct_member_list: [2][][]const u8 = .{ - .{ "name", "email" }, - .{"content"}, +pub const struct_member_list: [2][]const []const u8 = .{ + &[_][]const u8{ "name", "email", "age", "scores" }, + &[_][]const u8{"content"}, }; +// For now there is 4 types of data: str, int, float, bool +const MemberTypes = enum { int, float, bool, str }; + pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n"; + +/// User a map of member name / value string to create a new entity of a type +/// The goal being an array of map while parsing files to then return an array of entities and do some fileting on it. +pub fn createEntityFromMap(allocator: std.mem.Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) !*Types { + var t = try allocator.create(Types); + if (std.mem.eql(u8, struct_name, "User")) { + const age = try std.fmt.parseInt(i64, map.get("age").?, 10); + const scores = dataParsing.parseArrayInt(allocator, map.get("scores").?); + t.User = &User.init(map.get("name").?, map.get("email").?, age, scores.items); + } else { + return error.UnknowStructName; + } + return t; +} + +/// Get the list of all member name for a struct name +pub fn structName2structMembers(struct_name: []const u8) []const []const u8 { + var i: u16 = 0; + + while (i < struct_name_list.len) : (i += 1) { + if (std.mem.eql(u8, struct_name_list[i], struct_name)) break; + } + + return struct_member_list[i]; +} diff --git a/src/dtypes_example.zig b/src/dtypes_example.zig deleted file mode 100644 index cf3b47f..0000000 --- a/src/dtypes_example.zig +++ /dev/null @@ -1,33 +0,0 @@ -const std = @import("std"); -const UUID = @import("uuid.zig").UUID; - -pub const User = struct { - id: UUID, - name: []u8, - email: []u8, - - pub fn init(id: UUID, name: []const u8, email: []const u8) User { - return User{ .id = id, .name = name, .email = email }; - } -}; - -pub const Message = struct { - id: UUID, - content: []u8, - - pub fn init(id: UUID, content: []const u8) Message { - return Message{ .id = id, .content = content }; - } -}; - -pub const Types = union { - User: *User, - Message: *Message, -}; - -pub const struct_name_list: [2][]const u8 = .{ - "User", - "Message", -}; - -pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n"; diff --git a/src/dtypes_test.zig b/src/dtypes_test.zig new file mode 100644 index 0000000..62f8060 --- /dev/null +++ b/src/dtypes_test.zig @@ -0,0 +1,62 @@ +const std = @import("std"); +const UUID = @import("uuid.zig").UUID; + +pub const parameter_max_file_size_in_bytes = 500; // THe number of bytes than each file can be before splitting + +pub const User = struct { + id: UUID, + name: []const u8, + email: []const u8, + + pub fn init(name: []const u8, email: []const u8) User { + return User{ .id = UUID.init(), .name = name, .email = email }; + } +}; + +pub const Message = struct { + id: UUID, + content: []const u8, + + pub fn init(content: []const u8) Message { + return Message{ .id = UUID.init(), .content = content }; + } +}; + +pub const Types = union { + User: *const User, + Message: *const Message, +}; + +// Maybe create a struct like StructMetadata for the string list of member and name, ect +pub const struct_name_list: [2][]const u8 = .{ + "User", + "Message", +}; + +pub const struct_member_list: [2][]const []const u8 = .{ + &[_][]const u8{ "name", "email" }, + &[_][]const u8{"content"}, +}; + +pub const describe_str = "User (\n name: str,\n email: str,\n)\n\nMessage (\n content: str,\n)\n"; + +pub fn createEntityFromMap(allocator: std.mem.Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) !*Types { + var t = try allocator.create(Types); + if (std.mem.eql(u8, struct_name, "User")) { + t.User = &User.init(map.get("name").?, map.get("email").?); + } else { + return error.UnknowStructName; + } + return t; +} + +/// Get the list of all member name for a struct name +pub fn structName2structMembers(struct_name: []const u8) []const []const u8 { + var i: u16 = 0; + + while (i < struct_name_list.len) : (i += 1) { + if (std.mem.eql(u8, struct_name_list[i], struct_name)) break; + } + + return struct_member_list[i]; +} diff --git a/src/parsers/data-parsing.zig b/src/parsers/data-parsing.zig new file mode 100644 index 0000000..47bb77c --- /dev/null +++ b/src/parsers/data-parsing.zig @@ -0,0 +1,39 @@ +const std = @import("std"); + +// Series of functions to use just before creating an entity. +// Will transform the string of data into data of the right type. + +pub fn parseInt(value_str: []const u8) i64 { + return std.fmt.parseInt(i64, value_str, 10) catch return 0; +} + +pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) { + var array = std.ArrayList(i64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseInt(x)) catch {}; + } + + return array; +} + +test "Data parsing" { + const allocator = std.testing.allocator; + + // Int + const in1: [3][]const u8 = .{ "1", "42", "Hello" }; + const expected_out1: [3]i64 = .{ 1, 42, 0 }; + for (in1, 0..) |value, i| { + try std.testing.expect(parseInt(value) == expected_out1[i]); + } + std.debug.print("OK\tData parsing: Int\n", .{}); + + // Int array + const in2 = "[1 14 44 42 hello]"; + const out2 = parseArrayInt(allocator, in2); + defer out2.deinit(); + const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 }; + try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2)); + std.debug.print("OK\tData parsing: Int array\n", .{}); +} diff --git a/src/parsers/schemaParser.zig b/src/parsers/schemaParser.zig index 22cf235..9951b3a 100644 --- a/src/parsers/schemaParser.zig +++ b/src/parsers/schemaParser.zig @@ -143,6 +143,8 @@ pub const Parser = struct { } } + // Use @embedFile + // Make the union `Type` with all different struct self.writeToFile("pub const Types = union {\n"); for (struct_array.items) |struct_name| { diff --git a/src/query_functions/ADD.zig b/src/query_functions/ADD.zig new file mode 100644 index 0000000..2a1828f --- /dev/null +++ b/src/query_functions/ADD.zig @@ -0,0 +1,335 @@ +const std = @import("std"); +const dtypes = @import("../dtypes.zig"); +const UUID = @import("../uuid.zig").UUID; +const ziqlTokenizer = @import("../tokenizers/ziqlTokenizer.zig").Tokenizer; +const ziqlToken = @import("../tokenizers/ziqlTokenizer.zig").Token; +const Allocator = std.mem.Allocator; + +const stdout = std.io.getStdOut().writer(); + +// TODO to improve this part of the code: +// 1. Use logging +// 2. Create a struct that manage files with member: stdout, folder (e.g. the User folder), + +// Query that need to work now +// ADD User (name='Adrien', email='adrien.bouvais@gmail.com') OK +// ADD User (name='Adrien', email='adrien.bouvais@gmail.com', age = 26) OK +// ADD User (name='Adrien', email='adrien.bouvais@gmail.com', books = ['book1', 'book2']) OK +// ADD User (name='Adrien', email=null) OK +// +// For later: links +// ADD User (name = 'Adrien', best_friend = {name='bob'}, friends = {name != 'bob'}) NOT OK +// ADD User (name = 'Adrien', friends = {(name = 'bob' AND age > 16) OR (id = '0000-0000')} ) NOT OK +// TODO: make real test + +/// Function for the ADD query command. +/// It will parse the reste of the query and create a map of member name / value. +/// Then add those value to the appropriete file. The proper file is the first one with a size < to the limit. +/// If no file is found, a new one is created. +pub fn parseDataAndAddToFile(allocator: Allocator, struct_name: []const u8, toker: *ziqlTokenizer) !void { + const token = toker.next(); + switch (token.tag) { + .l_paren => {}, + else => { + try stdout.print("Error: Expected ( after the struct name of an ADD command.\nE.g. ADD User (name = 'bob')\n", .{}); + return; + }, + } + + const buffer = try allocator.alloc(u8, 1024 * 100); + defer allocator.free(buffer); + + var member_map = getMapOfMember(allocator, toker) catch return; + defer member_map.deinit(); + + if (!checkIfAllMemberInMap(struct_name, &member_map)) return; + + const entity = try dtypes.createEntityFromMap(allocator, struct_name, member_map); + const uuid_str = entity.User.*.id.format_uuid(); + defer stdout.print("Added new {s} successfully using UUID: {s}\n", .{ + struct_name, + uuid_str, + }) catch {}; + + const member_names = dtypes.structName2structMembers(struct_name); + for (member_names) |member_name| { + var file_map = getFilesStat(allocator, struct_name, member_name) catch { + try stdout.print("Error: File stat error", .{}); + return; + }; + const potential_file_name_to_use = getFirstUsableFile(file_map); + if (potential_file_name_to_use) |file_name| { + const file_index = fileName2Index(file_name) catch @panic("Error in fileName2Index"); + try stdout.print("Using file: {s} with a size of {d}\n", .{ file_name, file_map.get(file_name).?.size }); + + const path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/{s}", .{ + struct_name, + member_name, + file_name, + }); + + var file = std.fs.cwd().openFile(path, .{ + .mode = .read_write, + }) catch { + try stdout.print("Error opening data file.", .{}); + return; + }; + defer file.close(); + + try file.seekFromEnd(0); + try file.writer().print("{s} {s}\n", .{ uuid_str, member_map.get(member_name).? }); + + const path_to_main = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/main.zippondata", .{ + struct_name, + member_name, + }); + + var file_main = std.fs.cwd().openFile(path_to_main, .{ + .mode = .read_write, + }) catch { + try stdout.print("Error opening data file.", .{}); + return; + }; + defer file_main.close(); + + try appendToLineAtIndex(allocator, file_main, file_index, &uuid_str); + } else { + const max_index = maxFileIndex(file_map); + + const new_file_path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/{d}.zippondata", .{ + struct_name, + member_name, + max_index + 1, + }); + + try stdout.print("new file path: {s}\n", .{new_file_path}); + + // TODO: Create new file and save the data inside + const new_file = std.fs.cwd().createFile(new_file_path, .{}) catch @panic("Error creating new data file"); + defer new_file.close(); + + try new_file.writer().print("{s} {s}\n", .{ &uuid_str, member_map.get(member_name).? }); + + const path_to_main = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}/main.zippondata", .{ + struct_name, + member_name, + }); + + var file_main = std.fs.cwd().openFile(path_to_main, .{ + .mode = .read_write, + }) catch { + try stdout.print("Error opening data file.", .{}); + @panic(""); + }; + defer file_main.close(); + + try file_main.seekFromEnd(0); + try file_main.writeAll("\n "); + try file_main.seekTo(0); + try appendToLineAtIndex(allocator, file_main, max_index + 1, &uuid_str); + } + } +} + +/// Take the main.zippondata file, the index of the file where the data is saved and the string to add at the end of the line +fn appendToLineAtIndex(allocator: std.mem.Allocator, file: std.fs.File, index: usize, str: []const u8) !void { + const buffer = try allocator.alloc(u8, 1024 * 100); + defer allocator.free(buffer); + + var reader = file.reader(); + + var line_num: usize = 1; + while (try reader.readUntilDelimiterOrEof(buffer, '\n')) |_| { + if (line_num == index) { + try file.seekBy(-1); + try file.writer().print("{s} ", .{str}); + return; + } + line_num += 1; + } +} + +/// Return a map of file path => Stat for one struct and member name +fn getFilesStat(allocator: Allocator, struct_name: []const u8, member_name: []const u8) !*std.StringHashMap(std.fs.File.Stat) { + const cwd = std.fs.cwd(); + + const buffer = try allocator.alloc(u8, 1024); // Adjust the size as needed + defer allocator.free(buffer); + + const path = try std.fmt.bufPrint(buffer, "ZipponDB/DATA/{s}/{s}", .{ struct_name, member_name }); + + var file_map = std.StringHashMap(std.fs.File.Stat).init(allocator); + + const member_dir = cwd.openDir(path, .{ .iterate = true }) catch { + try stdout.print("Error opening struct directory", .{}); + @panic(""); + }; + + var iter = member_dir.iterate(); + while (try iter.next()) |entry| { + if (entry.kind != std.fs.Dir.Entry.Kind.file) continue; + + const file_stat = member_dir.statFile(entry.name) catch + { + try stdout.print("Error getting stat of a file", .{}); + @panic(""); + }; + + file_map.put(entry.name, file_stat) catch @panic("Error adding stat to map"); + } + + return &file_map; +} + +/// Use the map of file stat to find the first file with under the bytes limit. +/// return the name of the file. If none is found, return null. +fn getFirstUsableFile(map: *std.StringHashMap(std.fs.File.Stat)) ?[]const u8 { + var iter = map.keyIterator(); + while (iter.next()) |key| { + if (std.mem.eql(u8, key.*, "main.zippondata")) continue; + if (map.get(key.*).?.size < dtypes.parameter_max_file_size_in_bytes) return key.*; + } + return null; +} + +fn fileName2Index(file_name: []const u8) !usize { + try stdout.print("Got file name: {s}\n", .{file_name}); + var iter_file_name = std.mem.tokenize(u8, file_name, "."); + const num_str = iter_file_name.next().?; + const num: usize = try std.fmt.parseInt(usize, num_str, 10); + return num; +} + +/// Iter over all file and get the max name and return the value of it as i32 +/// So for example if there is 1.zippondata and 2.zippondata it return 2. +fn maxFileIndex(map: *std.StringHashMap(std.fs.File.Stat)) usize { + var iter = map.keyIterator(); + var index_max: usize = 0; + while (iter.next()) |key| { + if (std.mem.eql(u8, key.*, "main.zippondata")) continue; + var iter_file_name = std.mem.tokenize(u8, key.*, "."); + const num_str = iter_file_name.next().?; + const num: usize = std.fmt.parseInt(usize, num_str, 10) catch @panic("Error parsing file name into usize"); + if (num > index_max) index_max = num; + } + return index_max; +} + +const MemberMapError = error{ + NotMemberName, + NotEqualSign, + NotStringOrNumber, + NotComma, + PuttingNull, +}; + +/// Take the tokenizer and return a map of the query for the ADD command. +/// Keys are the member name and value are the string of the value in the query. E.g. 'Adrien' or '10' +pub fn getMapOfMember(allocator: Allocator, toker: *ziqlTokenizer) !std.StringHashMap([]const u8) { + std.debug.print("Started\n\n", .{}); + var token = toker.next(); + std.debug.print("{any}\n\n", .{token}); + + var member_map = std.StringHashMap([]const u8).init( + allocator, + ); + + std.debug.print("OK \n\n", .{}); + + while (token.tag != ziqlToken.Tag.eof) : (token = toker.next()) { + std.debug.print("{any}\n\n", .{token}); + switch (token.tag) { + .r_paren => continue, + .invalid => stdout.print("Error: Invalid token: {s}", .{toker.getTokenSlice(token)}) catch {}, + .identifier => { + const member_name_str = toker.getTokenSlice(token); + token = toker.next(); + switch (token.tag) { + .equal => { + token = toker.next(); + switch (token.tag) { + .string_literal, .number_literal => { + const value_str = toker.getTokenSlice(token); + member_map.put(member_name_str, value_str) catch @panic("Could not add member name and value to map in getMapOfMember"); + token = toker.next(); + switch (token.tag) { + .comma, .r_paren => continue, + else => { + stdout.print("Error: Expected , after string or number got: {s}. E.g. ADD User (name='bob', age=10)", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotComma; + }, + } + }, + .keyword_null => { + try stdout.print("Found null value\n", .{}); + const value_str = "null"; + member_map.put(member_name_str, value_str) catch { + try stdout.print("Error putting null value into the map\n", .{}); + return MemberMapError.PuttingNull; + }; + token = toker.next(); + switch (token.tag) { + .comma, .r_paren => continue, + else => { + stdout.print("Error: Expected , after string or number got: {s}. E.g. ADD User (name='bob', age=10)", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotComma; + }, + } + }, + .l_bracket => { + var array_values = std.ArrayList([]const u8).init(allocator); + token = toker.next(); + while (token.tag != ziqlToken.Tag.r_bracket) : (token = toker.next()) { + switch (token.tag) { + .string_literal, .number_literal => { + const value_str = toker.getTokenSlice(token); + array_values.append(value_str) catch @panic("Could not add value to array in getMapOfMember"); + }, + .invalid => stdout.print("Error: Invalid token: {s}", .{toker.getTokenSlice(token)}) catch {}, + else => { + stdout.print("Error: Expected string or number in array got: {s}. E.g. ADD User (scores=[10 20 30])", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotStringOrNumber; + }, + } + } + const array_str = try std.mem.join(allocator, " ", array_values.items); + member_map.put(member_name_str, array_str) catch @panic("Could not add member name and value to map in getMapOfMember"); + }, // TODO + else => { + stdout.print("Error: Expected string or number after a = got: {s}. E.g. ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotStringOrNumber; + }, + } + }, + else => { + stdout.print("Error: Expected = after a member declaration get {s}. E.g. ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotEqualSign; + }, + } + }, + else => { + stdout.print("Error: Unknow token: {s}. This should be the name of a member. E.g. name in ADD User (name='bob')", .{toker.getTokenSlice(token)}) catch {}; + return MemberMapError.NotMemberName; + }, + } + } + + return member_map; +} + +/// Using the name of a struct from dtypes and the map of member name => value string from the query. +/// Check if the map keys are exactly the same as the name of the member of the struct. +/// Basically checking if the query contain all value that a struct need to be init. +fn checkIfAllMemberInMap(struct_name: []const u8, map: *std.StringHashMap([]const u8)) bool { + const all_struct_member = dtypes.structName2structMembers(struct_name); + var count: u16 = 0; + + for (all_struct_member) |key| { + if (map.contains(key)) count += 1 else stdout.print("Error: ADD query of struct: {s}; missing member: {s}\n", .{ + struct_name, + key, + }) catch {}; + } + + return ((count == all_struct_member.len) and (count == map.count())); +} diff --git a/ZipponDB/DATA/Message/main.zippondata b/src/query_functions/DELETE.zig similarity index 100% rename from ZipponDB/DATA/Message/main.zippondata rename to src/query_functions/DELETE.zig diff --git a/src/query_functions/GRAB.zig b/src/query_functions/GRAB.zig new file mode 100644 index 0000000..c5a28f4 --- /dev/null +++ b/src/query_functions/GRAB.zig @@ -0,0 +1,100 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const ziqlTokenizer = @import("../tokenizers/ziqlTokenizer.zig").Tokenizer; + +// To work now +// GRAB User {} +// GRAB User {name = 'Adrien'} +// GRAB User {name='Adrien' AND age < 30} +// GRAB User [1] {} +// GRAB User [10; name] {age < 30} +// +// For later + +const stdout = std.io.getStdOut().writer(); + +const AdditionalData = struct { + entity_to_find: usize = 0, + member_to_find: std.ArrayList([]const u8), + + pub fn init(allocator: Allocator) AdditionalData { + return AdditionalData{ .member_to_find = std.ArrayList(u8).init(allocator) }; + } +}; + +pub const Parser = struct { + allocator: Allocator, + additional_data: AdditionalData, + toker: *ziqlTokenizer, + state: State, + + const State = enum { + start, + invalid, + end, + + expect_additional_data, + expect_count_of_entity_to_find, + expect_semicolon, + + expect_filter, + }; + + pub fn init(allocator: Allocator, toker: *ziqlTokenizer) Parser { + return Parser{ .allocator = allocator, .toker = toker, .state = State.expect_additional_data, .additional_data = AdditionalData.init(allocator) }; + } + + pub fn deinit(self: *Parser) void { + self.additional_data.member_to_find.deinit(); + } + + pub fn parse_additional_data(self: *Parser) !void { + var token = self.toker.next(); + while (self.state != State.end) : (token = self.toker.next()) { + switch (self.state) { + .expect_additional_data => { + switch (token.tag) { + .l_bracket => { + try stdout.print("Additional data found.\n", .{}); + self.state = State.expect_count_of_entity_to_find; + }, + else => { + try stdout.print("No additional data found.\n", .{}); + self.state = State.expect_filter; + }, + } + }, + .expect_count_of_entity_to_find => { + switch (token.tag) { + .number_literal => { + try stdout.print("Count of entity found.\n", .{}); + self.state = State.expect_semicolon; + }, + else => { + try stdout.print("No count of entity found.\n", .{}); + self.state = State.expect_filter; + }, + } + }, + .expect_semicolon => { + switch (token.tag) { + .semicolon => { + try stdout.print("Found semiconlon.\n", .{}); + self.state = State.expect_semicolon; + }, + else => { + try stdout.print("Expected semicon here: {s}.\n", .{self.toker.buffer[token.loc.start - 5 .. token.loc.end + 5]}); + self.state = State.invalid; + }, + } + }, + .invalid => { + return; + }, + else => { + try stdout.print("End\n", .{}); + }, + } + } + } +}; diff --git a/src/test.zig b/src/test.zig new file mode 100644 index 0000000..35f736a --- /dev/null +++ b/src/test.zig @@ -0,0 +1,22 @@ +const std = @import("std"); +const dtypes = @import("dtypes.zig"); +const UUID = @import("uuid.zig").UUID; +const ziqlTokenizer = @import("tokenizers/ziqlTokenizer.zig").Tokenizer; +const ziqlToken = @import("tokenizers/ziqlTokenizer.zig").Token; + +// Test for functions in for_add.zig +const getMapOfMember = @import("query_functions/ADD.zig").getMapOfMember; + +test "Get map of members" { + const allocator = std.testing.allocator; + + const in = "(name='Adrien', email='adrien@gmail.com', age=26, scores=[42 100 5])"; + const null_term_in = try allocator.dupeZ(u8, in); + + var toker = ziqlTokenizer.init(null_term_in); + + const member_map = try getMapOfMember(allocator, &toker); + std.debug.print("{s}", .{member_map.get("name").?}); + + allocator.free(null_term_in); +} diff --git a/src/tokenizers/ziqlTokenizer.zig b/src/tokenizers/ziqlTokenizer.zig index 867ed77..0b5d708 100644 --- a/src/tokenizers/ziqlTokenizer.zig +++ b/src/tokenizers/ziqlTokenizer.zig @@ -16,6 +16,7 @@ pub const Token = struct { .{ "DELETE", .keyword_delete }, .{ "ADD", .keyword_add }, .{ "IN", .keyword_in }, + .{ "null", .keyword_null }, .{ "__DESCRIBE__", .keyword__describe__ }, }); @@ -32,6 +33,7 @@ pub const Token = struct { keyword_delete, keyword_add, keyword_in, + keyword_null, keyword__describe__, string_literal, @@ -62,9 +64,8 @@ pub const Tokenizer = struct { buffer: [:0]const u8, index: usize, - /// For debugging purposes. - pub fn dump(self: *Tokenizer, token: *const Token) void { - std.debug.print("{s} \"{s}\"\n", .{ @tagName(token.tag), self.buffer[token.loc.start..token.loc.end] }); + pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.start..token.loc.end]; } pub fn init(buffer: [:0]const u8) Tokenizer { diff --git a/src/uuid.zig b/src/uuid.zig index c381eb0..0b2c110 100644 --- a/src/uuid.zig +++ b/src/uuid.zig @@ -31,7 +31,7 @@ pub const UUID = struct { std.mem.copyForwards(u8, slice, &string); } - fn format_uuid(self: UUID) [36]u8 { + pub fn format_uuid(self: UUID) [36]u8 { var buf: [36]u8 = undefined; buf[8] = '-'; buf[13] = '-'; diff --git a/zig-out/bin/zippon b/zig-out/bin/zippon index 9278fbb..7978061 100755 Binary files a/zig-out/bin/zippon and b/zig-out/bin/zippon differ