From fbcca0dc0967dce9dcb802b9623121a3b3ac9e60 Mon Sep 17 00:00:00 2001 From: MrBounty Date: Fri, 11 Oct 2024 17:51:45 +0200 Subject: [PATCH] Implemented dynamic schema Started by doing a SchemaEngine but at the end I just put everything inside the FileEngine. Now you can use 'schema init path/to/schema' to initialize the struct folders and first data file, Also save a copy of the schema in a file in the ZipponDB folder. --- README.md | 2 +- build.zig | 22 +- example.zipponschema | 7 + src/cli.zig | 209 +++++++ src/cliParser.zig | 151 ------ src/engines/schema.zig | 112 ---- src/{engines/file.zig => fileEngine.zig} | 390 +++++++------ src/schemaParser.zig | 273 ++++++---- src/tokenizers/cli.zig | 6 +- src/tokenizers/file.zig | 127 +++++ src/tokenizers/schema.zig | 6 + src/{engines => }/types/dataType.zig | 0 src/{engines => }/types/uuid.zig | 0 src/utils.zig | 14 + src/ziqlParser.zig | 664 ++++++++--------------- 15 files changed, 983 insertions(+), 1000 deletions(-) create mode 100644 example.zipponschema create mode 100644 src/cli.zig delete mode 100644 src/cliParser.zig delete mode 100644 src/engines/schema.zig rename src/{engines/file.zig => fileEngine.zig} (61%) create mode 100644 src/tokenizers/file.zig rename src/{engines => }/types/dataType.zig (100%) rename src/{engines => }/types/uuid.zig (100%) create mode 100644 src/utils.zig diff --git a/README.md b/README.md index 757f386..6cd26b6 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ TODO: Create a tech doc of what is happening inside. - [X] File engine #### v0.2 - Usable -- [ ] B-Tree +- [ ] B+Tree - [ ] Relationships - [ ] Date - [ ] Link query diff --git a/build.zig b/build.zig index f790abe..1c5a337 100644 --- a/build.zig +++ b/build.zig @@ -6,7 +6,7 @@ pub fn build(b: *std.Build) void { const optimize = b.standardOptimizeOption(.{}); const exe = b.addExecutable(.{ .name = "zippon", - .root_source_file = b.path("src/cliParser.zig"), + .root_source_file = b.path("src/cli.zig"), .target = target, .optimize = optimize, }); @@ -18,6 +18,15 @@ pub fn build(b: *std.Build) void { const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); + const tests1 = b.addTest(.{ + .root_source_file = b.path("src/tokenizers/file.zig"), + .target = target, + .optimize = optimize, + .name = "File tokenizer", + .test_runner = b.path("test_runner.zig"), + }); + const run_tests1 = b.addRunArtifact(tests1); + const tests2 = b.addTest(.{ .root_source_file = b.path("src/tokenizers/cli.zig"), .target = target, @@ -46,7 +55,7 @@ pub fn build(b: *std.Build) void { const run_tests4 = b.addRunArtifact(tests4); const tests5 = b.addTest(.{ - .root_source_file = b.path("src/engines/types/uuid.zig"), + .root_source_file = b.path("src/types/uuid.zig"), .target = target, .optimize = optimize, .name = "UUID", @@ -55,7 +64,7 @@ pub fn build(b: *std.Build) void { const run_tests5 = b.addRunArtifact(tests5); const tests6 = b.addTest(.{ - .root_source_file = b.path("src/engines/file.zig"), + .root_source_file = b.path("src/fileEngine.zig"), .target = target, .optimize = optimize, .name = "File Engine", @@ -63,20 +72,21 @@ pub fn build(b: *std.Build) void { }); const run_tests6 = b.addRunArtifact(tests6); - const tests7 = b.addTest(.{ + const tests8 = b.addTest(.{ .root_source_file = b.path("src/ziqlParser.zig"), .target = target, .optimize = optimize, .name = "ZiQL parser", .test_runner = b.path("test_runner.zig"), }); - const run_tests7 = b.addRunArtifact(tests7); + const run_tests8 = b.addRunArtifact(tests8); const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_tests1.step); test_step.dependOn(&run_tests2.step); test_step.dependOn(&run_tests3.step); test_step.dependOn(&run_tests4.step); test_step.dependOn(&run_tests5.step); test_step.dependOn(&run_tests6.step); - test_step.dependOn(&run_tests7.step); + test_step.dependOn(&run_tests8.step); } diff --git a/example.zipponschema b/example.zipponschema new file mode 100644 index 0000000..14ce3e6 --- /dev/null +++ b/example.zipponschema @@ -0,0 +1,7 @@ +User ( + name: str, + age: int, + email: str, + scores: []int, + friends: []bool, +) diff --git a/src/cli.zig b/src/cli.zig new file mode 100644 index 0000000..348ab32 --- /dev/null +++ b/src/cli.zig @@ -0,0 +1,209 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const FileEngine = @import("fileEngine.zig").FileEngine; +const cliTokenizer = @import("tokenizers/cli.zig").Tokenizer; +const cliToken = @import("tokenizers/cli.zig").Token; +const ziqlTokenizer = @import("tokenizers/ziql.zig").Tokenizer; +const ziqlToken = @import("tokenizers/ziql.zig").Token; +const ziqlParser = @import("ziqlParser.zig").Parser; + +const stdout = std.io.getStdOut().writer(); + +fn send(comptime format: []const u8, args: anytype) void { + stdout.print(format, args) catch |err| { + std.log.err("Can't send: {any}", .{err}); + stdout.print("\x03\n", .{}) catch {}; + }; + + stdout.print("\x03\n", .{}) catch {}; +} + +const State = enum { + expect_main_command, + expect_query, + expect_schema_command, + expect_path_to_schema, + quit, + end, +}; + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + defer { + switch (gpa.deinit()) { + .ok => std.log.debug("No memory leak baby !\n", .{}), + .leak => std.log.debug("We fucked it up bro...\n", .{}), + } + } + + // TODO: Use the path of an environment variable if one found, otherwise wait for the user to use the schema init + checkAndCreateDirectories(); + var file_engine = FileEngine.init(allocator, null); + defer file_engine.deinit(); + + const line_buf = try allocator.alloc(u8, 1024 * 50); + defer allocator.free(line_buf); + + var state: State = .expect_main_command; + + // TODO: Use a State to prevent first_token and second_token + while (true) { + std.debug.print("> ", .{}); + const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(line_buf, '\n'); + + if (line) |line_str| { + const time_initial = std.time.microTimestamp(); + + const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]); + defer allocator.free(null_term_line_str); + + var cliToker = cliTokenizer.init(null_term_line_str); + var token = cliToker.next(); + state = .expect_main_command; + + while ((state != .end) and (state != .quit)) : (token = cliToker.next()) { + switch (state) { + .expect_main_command => switch (token.tag) { + .keyword_run => state = .expect_query, + .keyword_schema => state = .expect_schema_command, + .keyword_help => { + send("{s}", .{ + \\Welcome to ZipponDB! + \\ + \\run To run a query. Args => query: str, the query to execute. + \\schema Build a new engine and print current schema. + \\quit To stop the process without saving + \\dump Create a new folder with all data as copy. Args => foldername: str, the name of the folder. + \\bump Replace current data with a previous dump. Args => foldername: str, the name of the folder. + \\ + }); + state = .end; + }, + .keyword_quit => state = .quit, + .eof => state = .end, + else => { + send("Command need to start with a keyword, including: run, schema, help and quit\n", .{}); + state = .end; + }, + }, + + .expect_query => switch (token.tag) { + .string_literal => { + const null_term_query_str = try allocator.dupeZ(u8, line_str[token.loc.start + 1 .. token.loc.end - 1]); + defer allocator.free(null_term_query_str); + try runQuery(null_term_query_str, &file_engine); + state = .end; + }, + .keyword_help => { + send("The run command will take a ZiQL query between \" and run it. eg: run \"GRAB User\"\n", .{}); + state = .end; + }, + else => { + send("After command run, need a string of a query, eg: \"GRAB User\"\n", .{}); + state = .end; + }, + }, + + .expect_schema_command => switch (token.tag) { + .keyword_describe => { + if (file_engine.null_terminated_schema_buff.len == 0) { + send("Need to init the schema first. Please use the schema init path/to/schema command to start.", .{}); + } else { + send("Schema:\n {s}", .{file_engine.null_terminated_schema_buff}); + } + state = .end; + }, + .keyword_init => state = .expect_path_to_schema, + .keyword_help => { + send("{s}", .{ + \\Here are all available options to use with the schema command: + \\ + \\describe Print the schema use by the current engine. + \\build Build a new engine using a schema file. Args => filename: str, path of schema file to use. Default 'schema.zipponschema'. + \\ + }); + state = .end; + }, + else => { + std.debug.print("schema available options: describe, build & help\n", .{}); + state = .end; + }, + }, + + .expect_path_to_schema => switch (token.tag) { + .identifier => { + file_engine.initDataFolder(cliToker.getTokenSlice(token)) catch |err| switch (err) { + error.SchemaFileNotFound => { + send("Coulnt find the schema file at {s}", .{cliToker.getTokenSlice(token)}); + state = .end; + }, + else => { + send("Error initializing the schema", .{}); + state = .end; + }, + }; + send("Successfully initialized the database!", .{}); + state = .end; + }, + else => { + send("Expected a path to a schema file after the schema init command.", .{}); + state = .end; + }, + }, + + .quit, .end => break, + } + } + + const time_final = std.time.microTimestamp(); + const duration = time_final - time_initial; + std.debug.print("Time: {d:.2}ms\n", .{@as(f64, @floatFromInt(duration)) / 1000.0}); + + if (state == .quit) break; + } + } +} + +pub fn runQuery(null_term_query_str: [:0]const u8, file_engine: *FileEngine) !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const allocator = gpa.allocator(); + + var toker = ziqlTokenizer.init(null_term_query_str); + + var parser = ziqlParser.init(allocator, &toker, file_engine); + defer { + parser.deinit(); + switch (gpa.deinit()) { + .ok => std.log.debug("No memory leak baby !\n", .{}), + .leak => std.log.debug("We fucked it up bro...\n", .{}), + } + } + + try parser.parse(); +} + +// TODO: Put that in the FileEngine +fn checkAndCreateDirectories() void { + const cwd = std.fs.cwd(); + + cwd.makeDir("ZipponDB") catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => @panic("Error other than path already exists when trying to create the ZipponDB directory.\n"), + }; + + cwd.makeDir("ZipponDB/DATA") catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => @panic("Error other than path already exists when trying to create the DATA directory.\n"), + }; + + cwd.makeDir("ZipponDB/BACKUP") catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => @panic("Error other than path already exists when trying to create the ENGINE directory.\n"), + }; + + cwd.makeDir("ZipponDB/LOG") catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => @panic("Error other than path already exists when trying to create the ENGINE directory.\n"), + }; +} diff --git a/src/cliParser.zig b/src/cliParser.zig deleted file mode 100644 index a82d449..0000000 --- a/src/cliParser.zig +++ /dev/null @@ -1,151 +0,0 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const DataEngine = @import("engines/file.zig").FileEngine; -const cliTokenizer = @import("tokenizers/cli.zig").Tokenizer; -const cliToken = @import("tokenizers/cli.zig").Token; -const ziqlTokenizer = @import("tokenizers/ziql.zig").Tokenizer; -const ziqlToken = @import("tokenizers/ziql.zig").Token; -const ziqlParser = @import("ziqlParser.zig").Parser; - -const stdout = std.io.getStdOut().writer(); - -fn send(comptime format: []const u8, args: anytype) void { - stdout.print(format, args) catch |err| { - std.log.err("Can't send: {any}", .{err}); - stdout.print("\x03\n", .{}) catch {}; - }; - - stdout.print("\x03\n", .{}) catch {}; -} - -pub fn main() !void { - // TODO: Use an environment variable for the path of the DB - checkAndCreateDirectories(); - - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - - defer { - switch (gpa.deinit()) { - .ok => std.log.debug("No memory leak baby !\n", .{}), - .leak => std.log.debug("We fucked it up bro...\n", .{}), - } - } - - const line_buf = try allocator.alloc(u8, 1024 * 50); - defer allocator.free(line_buf); - - // TODO: Use a State to prevent first_token and second_token - while (true) { - std.debug.print("> ", .{}); - const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(line_buf, '\n'); - - if (line) |line_str| { - const time_initial = std.time.microTimestamp(); - - const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]); - defer allocator.free(null_term_line_str); - - var cliToker = cliTokenizer.init(null_term_line_str); - const command_token = cliToker.next(); - switch (command_token.tag) { - .keyword_run => { - const query_token = cliToker.next(); - switch (query_token.tag) { - .string_literal => { - const null_term_query_str = try allocator.dupeZ(u8, line_str[query_token.loc.start + 1 .. query_token.loc.end - 1]); - defer allocator.free(null_term_query_str); - try runCommand(null_term_query_str); - }, - .keyword_help => send("The run command will take a ZiQL query between \" and run it. eg: run \"GRAB User\"\n", .{}), - else => send("After command run, need a string of a query, eg: \"GRAB User\"\n", .{}), - } - }, - .keyword_schema => { - const second_token = cliToker.next(); - - switch (second_token.tag) { - .keyword_describe => send("{s}\n", .{ // TODO: Change that to use the SchemaEngine - \\User ( - \\ name: str, - \\ email: str, - \\) - \\Message ( - \\ content: str, - \\) - }), - .keyword_init => { // Maybe rename that in init now that I dont build binary anymore - const data_engine = DataEngine.init(allocator, null); - try data_engine.initDataFolder(); - }, - .keyword_help => { - send("{s}", .{ - \\Here are all available options to use with the schema command: - \\ - \\describe Print the schema use by the current engine. - \\build Build a new engine using a schema file. Args => filename: str, path of schema file to use. Default 'schema.zipponschema'. - \\ - }); - }, - else => std.debug.print("schema available options: describe, build & help\n", .{}), - } - }, - .keyword_help => { - send("{s}", .{ - \\Welcome to ZipponDB! - \\ - \\run To run a query. Args => query: str, the query to execute. - \\schema Build a new engine and print current schema. - \\quit To stop the process without saving - \\dump Create a new folder with all data as copy. Args => foldername: str, the name of the folder. - \\bump Replace current data with a previous dump. Args => foldername: str, the name of the folder. - \\ - }); - }, - .keyword_quit => break, - .eof => {}, - else => send("Command need to start with a keyword, including: run, schema, help and quit\n", .{}), - } - - const time_final = std.time.microTimestamp(); - const duration = time_final - time_initial; - std.debug.print("Time: {d:.2}ms\n", .{@as(f64, @floatFromInt(duration)) / 1000.0}); - } - } -} - -pub fn runCommand(null_term_query_str: [:0]const u8) !void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - - var toker = ziqlTokenizer.init(null_term_query_str); - - var parser = ziqlParser.init(allocator, &toker); - defer parser.deinit(); - - try parser.parse(); -} - -fn checkAndCreateDirectories() void { - const cwd = std.fs.cwd(); - - cwd.makeDir("ZipponDB") catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error other than path already exists when trying to create the ZipponDB directory.\n"), - }; - - cwd.makeDir("ZipponDB/DATA") catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error other than path already exists when trying to create the DATA directory.\n"), - }; - - cwd.makeDir("ZipponDB/BACKUP") catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error other than path already exists when trying to create the ENGINE directory.\n"), - }; - - cwd.makeDir("ZipponDB/LOG") catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => @panic("Error other than path already exists when trying to create the ENGINE directory.\n"), - }; -} diff --git a/src/engines/schema.zig b/src/engines/schema.zig deleted file mode 100644 index 69d11ec..0000000 --- a/src/engines/schema.zig +++ /dev/null @@ -1,112 +0,0 @@ -// This file is named and use as a struct but is in fact just a series of utils functions to get and check the schema -// TODO: create a struct like SchemaEngine so I can do propre testing and it make update it easier -// Also can put the migration stuff in here - -const std = @import("std"); -const DataType = @import("types/dataType.zig").DataType; - -pub const struct_name_list: [2][]const u8 = .{ - "User", - "Message", -}; - -pub const struct_member_list: [2][]const []const u8 = .{ - &[_][]const u8{ "name", "email", "age", "scores", "friends" }, - &[_][]const u8{"content"}, -}; - -pub const struct_type_list: [2][]const DataType = .{ - &[_]DataType{ .str, .str, .int, .int_array, .bool_array }, - &[_]DataType{.str}, -}; - -// use to know how much token the Parser of the FileEngine need to pass before the right one -pub fn columnIndexOfMember(struct_name: []const u8, member_name: []const u8) ?usize { - var i: u16 = 0; - - for (structName2structMembers(struct_name)) |mn| { - if (std.mem.eql(u8, mn, member_name)) return i; - i += 1; - } - - return null; -} - -/// Get the type of the member -pub fn memberName2DataType(struct_name: []const u8, member_name: []const u8) ?DataType { - var i: u16 = 0; - - for (structName2structMembers(struct_name)) |mn| { - if (std.mem.eql(u8, mn, member_name)) return structName2DataType(struct_name)[i]; - i += 1; - } - - return null; -} - -/// Get the list of all member name for a struct name -pub fn structName2structMembers(struct_name: []const u8) []const []const u8 { - var i: u16 = 0; - - while (i < struct_name_list.len) : (i += 1) if (std.mem.eql(u8, struct_name_list[i], struct_name)) break; - - if (i == struct_name_list.len) { - std.debug.print("{s} \n", .{struct_name}); - @panic("Struct name not found!"); - } - - return struct_member_list[i]; -} - -pub fn structName2DataType(struct_name: []const u8) []const DataType { - var i: u16 = 0; - - while (i < struct_name_list.len) : (i += 1) if (std.mem.eql(u8, struct_name_list[i], struct_name)) break; - - return struct_type_list[i]; -} - -/// Chech if the name of a struct is in the current schema -pub fn isStructNameExists(struct_name: []const u8) bool { - for (struct_name_list) |sn| if (std.mem.eql(u8, sn, struct_name)) return true; - return false; -} - -/// Check if a struct have the member name -pub fn isMemberNameInStruct(struct_name: []const u8, member_name: []const u8) bool { - for (structName2structMembers(struct_name)) |mn| if (std.mem.eql(u8, mn, member_name)) return true; - return false; -} - -/// Take a struct name and a member name and return true if the member name is part of the struct -pub fn isMemberPartOfStruct(struct_name: []const u8, member_name: []const u8) bool { - const all_struct_member = structName2structMembers(struct_name); - - for (all_struct_member) |key| { - if (std.mem.eql(u8, key, member_name)) return true; - } - - return false; -} - -/// Check if a string is a name of a struct in the currently use engine -pub fn isStructInSchema(struct_name_to_check: []const u8) bool { - for (struct_name_list) |struct_name| { - if (std.mem.eql(u8, struct_name_to_check, struct_name)) { - return true; - } - } - return false; -} - -// Return true if the map have all the member name as key and not more -pub fn checkIfAllMemberInMap(struct_name: []const u8, map: *std.StringHashMap([]const u8)) bool { - const all_struct_member = structName2structMembers(struct_name); - var count: u16 = 0; - - for (all_struct_member) |key| { - if (map.contains(key)) count += 1 else std.debug.print("Missing: {s}\n", .{key}); - } - - return ((count == all_struct_member.len) and (count == map.count())); -} diff --git a/src/engines/file.zig b/src/fileEngine.zig similarity index 61% rename from src/engines/file.zig rename to src/fileEngine.zig index 651c2e2..5491da6 100644 --- a/src/engines/file.zig +++ b/src/fileEngine.zig @@ -1,8 +1,13 @@ const std = @import("std"); -const schemaEngine = @import("schema.zig"); const Allocator = std.mem.Allocator; const UUID = @import("types/uuid.zig").UUID; const DataType = @import("types/dataType.zig").DataType; +const FileTokenizer = @import("tokenizers/file.zig").Tokenizer; +const FileToken = @import("tokenizers/file.zig").Token; +const SchemaStruct = @import("schemaParser.zig").Parser.SchemaStruct; +const SchemaParser = @import("schemaParser.zig").Parser; +const SchemaTokenizer = @import("tokenizers/schema.zig").Tokenizer; +const SchemaToken = @import("tokenizers/schema.zig").Token; //TODO: Create a union class and chose between file and memory @@ -10,142 +15,39 @@ const DataType = @import("types/dataType.zig").DataType; /// Or even get stats, whatever. If it touch files, it's here pub const FileEngine = struct { allocator: Allocator, - path_to_DATA_dir: []const u8, // The path to the DATA folder + path_to_ZipponDB_dir: []const u8, // The path to the DATA folder max_file_size: usize = 5e+4, // 50kb TODO: Change + null_terminated_schema_buff: [:0]u8, + struct_array: std.ArrayList(SchemaStruct), - pub const Token = struct { - tag: Tag, - loc: Loc, + pub fn init(allocator: Allocator, path: ?[]const u8) FileEngine { + const path_to_ZipponDB_dir = path orelse "ZipponDB"; - pub const Loc = struct { - start: usize, - end: usize, + var schema_buf = allocator.alloc(u8, 1024 * 50) catch @panic("Cant allocate the schema buffer"); + defer allocator.free(schema_buf); + + const len: usize = FileEngine.readSchemaFile(allocator, path_to_ZipponDB_dir, schema_buf) catch 0; + const null_terminated_schema_buff = allocator.dupeZ(u8, schema_buf[0..len]) catch @panic("Cant allocate null term buffer for the schema"); + + var toker = SchemaTokenizer.init(null_terminated_schema_buff); + var parser = SchemaParser.init(&toker, allocator); + + var struct_array = std.ArrayList(SchemaStruct).init(allocator); + parser.parse(&struct_array) catch {}; + + return FileEngine{ + .allocator = allocator, + .path_to_ZipponDB_dir = path_to_ZipponDB_dir, + .null_terminated_schema_buff = null_terminated_schema_buff, + .struct_array = struct_array, }; + } - pub const Tag = enum { - eof, - invalid, - - string_literal, - int_literal, - float_literal, - identifier, - equal, - bang, // ! - pipe, // | - l_paren, // ( - r_paren, // ) - l_bracket, // [ - r_bracket, // ] - l_brace, // { - r_brace, // } - semicolon, // ; - comma, // , - angle_bracket_left, // < - angle_bracket_right, // > - angle_bracket_left_equal, // <= - angle_bracket_right_equal, // >= - equal_angle_bracket_right, // => - period, // . - bang_equal, // != - }; - }; - - pub const Tokenizer = struct { - buffer: [:0]const u8, - index: usize, - - // Maybe change that to use the stream directly so I dont have to read the line 2 times - pub fn init(buffer: [:0]const u8) Tokenizer { - // Skip the UTF-8 BOM if present. - return .{ - .buffer = buffer, - .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, // WTF ? I guess some OS add that or some shit like that - }; - } - - const State = enum { - start, - string_literal, - float, - int, - }; - - pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { - return self.buffer[token.loc.start..token.loc.end]; - } - - pub fn next(self: *Tokenizer) Token { - // That ugly but work - if (self.buffer[self.index] == ' ') self.index += 1; - - var state: State = .start; - var result: Token = .{ - .tag = undefined, - .loc = .{ - .start = self.index, - .end = undefined, - }, - }; - while (true) : (self.index += 1) { - const c = self.buffer[self.index]; - - if (self.index == self.buffer.len) break; - - switch (state) { - .start => switch (c) { - '\'' => { - state = .string_literal; - result.tag = .string_literal; - }, - '0'...'9', '-' => { - state = .int; - result.tag = .int_literal; - }, - '[' => { - result.tag = .l_bracket; - self.index += 1; - break; - }, - ']' => { - result.tag = .r_bracket; - self.index += 1; - break; - }, - else => std.debug.print("Unknow character: {c}\n", .{c}), - }, - - .string_literal => switch (c) { - '\'' => { - self.index += 1; - break; - }, - else => continue, - }, - - .int => switch (c) { - '.' => { - state = .float; - result.tag = .float_literal; - }, - '0'...'9' => continue, - else => break, - }, - .float => switch (c) { - '0'...'9' => { - continue; - }, - else => { - break; - }, - }, - } - } - - result.loc.end = self.index; - return result; - } - }; + pub fn deinit(self: *FileEngine) void { + for (self.struct_array.items) |*elem| elem.deinit(); + self.struct_array.deinit(); + self.allocator.free(self.null_terminated_schema_buff); + } const ComparisonValue = union { int: i64, @@ -173,20 +75,18 @@ pub const FileEngine = struct { } }; - pub fn init(allocator: Allocator, DATA_path: ?[]const u8) FileEngine { - // I think use env variable for the path, idk, something better at least than just that 😕 - return FileEngine{ - .allocator = allocator, - .path_to_DATA_dir = DATA_path orelse "ZipponDB/DATA", - }; - } - /// Take a condition and an array of UUID and fill the array with all UUID that match the condition + /// TODO: Optimize the shit out of this, it it way too slow rn. Here some ideas + /// - Array can take a very long time to parse, maybe put them in a seperate file. But string can be too... + /// - Use the stream directly in the tokenizer + /// - Use a fixed size and split into other file. Like one file for one member (Because very long, like an array of 1000 value) and another one for everything else + /// The threselhold can be like if the average len is > 400 character. So UUID would take less that 10% of the storage + /// - Save data in a more compact way pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) !void { const max_file_index = try self.maxFileIndex(condition.struct_name); var current_index: usize = 0; - var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); + var sub_path = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); defer self.allocator.free(sub_path); var file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator"); @@ -220,8 +120,8 @@ pub const FileEngine = struct { } } - var token: FileEngine.Token = undefined; - const column_index = schemaEngine.columnIndexOfMember(condition.struct_name, condition.member_name); + var token: FileToken = undefined; + const column_index = self.columnIndexOfMember(condition.struct_name, condition.member_name); while (true) { output_fbs.reset(); @@ -234,7 +134,7 @@ pub const FileEngine = struct { current_index += 1; self.allocator.free(sub_path); - sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); + sub_path = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); file.close(); // Do I need to close ? I think so file = std.fs.cwd().openFile(sub_path, .{}) catch { @@ -257,7 +157,7 @@ pub const FileEngine = struct { const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]); defer self.allocator.free(null_terminated_string); - var data_toker = Tokenizer.init(null_terminated_string); + var data_toker = FileTokenizer.init(null_terminated_string); const uuid = try UUID.parse(output_fbs.getWritten()[0..36]); // Skip unwanted token @@ -333,7 +233,7 @@ pub const FileEngine = struct { // TODO: Clean a bit the code // Do I need multiple files too ? I mean it duplicate UUID a lot, if it's just to save a name like 'Bob', storing a long UUID is overkill // I could just use a tabular data format with separator using space - Or maybe I encode the uuid to take a minimum space as I always know it size - pub fn writeEntity(self: FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID { + pub fn writeEntity(self: *FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID { const uuid = UUID.init(); const potential_file_index = try self.getFirstUsableIndexFile(struct_name); @@ -344,21 +244,20 @@ pub const FileEngine = struct { defer self.allocator.free(path); if (potential_file_index) |file_index| { - path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, file_index }); + path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, file_index }); file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch @panic("=("); } else { const max_index = try self.maxFileIndex(struct_name); - path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, max_index + 1 }); + path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, max_index + 1 }); file = std.fs.cwd().createFile(path, .{}) catch @panic("Error creating new data file"); } try file.seekFromEnd(0); try file.writer().print("{s}", .{uuid.format_uuid()}); - const member_names = schemaEngine.structName2structMembers(struct_name); // This need to be in the same order all the time tho - for (member_names) |member_name| { - try file.writer().print(" {s}", .{data_map.get(member_name).?}); + for (self.structName2structMembers(struct_name)) |member_name| { + try file.writer().print(" {s}", .{data_map.get(self.locToSlice(member_name)).?}); } try file.writer().print("\n", .{}); @@ -375,7 +274,7 @@ pub const FileEngine = struct { /// Use the map of file stat to find the first file with under the bytes limit. /// return the name of the file. If none is found, return null. fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) !?usize { - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); + const path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, struct_name }); defer self.allocator.free(path); var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); @@ -392,7 +291,7 @@ pub const FileEngine = struct { /// Iter over all file and get the max name and return the value of it as usize /// So for example if there is 1.zippondata and 2.zippondata it return 2. fn maxFileIndex(self: FileEngine, struct_name: []const u8) !usize { - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); + const path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, struct_name }); defer self.allocator.free(path); const member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); @@ -406,35 +305,186 @@ pub const FileEngine = struct { return count - 1; } - // TODO: Give the option to keep , dump or erase the data - pub fn initDataFolder(self: FileEngine) !void { - var data_dir = try std.fs.cwd().openDir(self.path_to_DATA_dir, .{}); + const FileError = error{ + SchemaFileNotFound, + SchemaNotConform, + DATAFolderNotFound, + StructFolderNotFound, + CantMakeDir, + CantMakeFile, + }; + + /// Request a path to a schema file and then create the struct folder + /// TODO: Delete current folder before new one are created + pub fn initDataFolder(self: *FileEngine, path_to_schema_file: []const u8) FileError!void { + var schema_buf = self.allocator.alloc(u8, 1024 * 50) catch @panic("Cant allocate the schema buffer"); + defer self.allocator.free(schema_buf); + + const file = std.fs.cwd().openFile(path_to_schema_file, .{}) catch return FileError.SchemaFileNotFound; + defer file.close(); + + const len = file.readAll(schema_buf) catch @panic("Can't read schema file"); + + self.allocator.free(self.null_terminated_schema_buff); + self.null_terminated_schema_buff = self.allocator.dupeZ(u8, schema_buf[0..len]) catch @panic("Cant allocate null term buffer for the schema"); + + var toker = SchemaTokenizer.init(self.null_terminated_schema_buff); + var parser = SchemaParser.init(&toker, self.allocator); + + // Deinit the struct array before creating a new one + for (self.struct_array.items) |*elem| elem.deinit(); + for (0..self.struct_array.items.len) |_| _ = self.struct_array.pop(); + + parser.parse(&self.struct_array) catch return error.SchemaNotConform; + + const path = std.fmt.allocPrint(self.allocator, "{s}/DATA", .{self.path_to_ZipponDB_dir}) catch @panic("Cant allocate path"); + defer self.allocator.free(path); + + var data_dir = std.fs.cwd().openDir(path, .{}) catch return FileError.DATAFolderNotFound; defer data_dir.close(); - for (schemaEngine.struct_name_list) |struct_name| { - data_dir.makeDir(struct_name) catch |err| switch (err) { + for (self.struct_array.items) |struct_item| { + data_dir.makeDir(self.locToSlice(struct_item.name)) catch |err| switch (err) { error.PathAlreadyExists => {}, - else => return err, + else => return FileError.CantMakeDir, }; - const struct_dir = try data_dir.openDir(struct_name, .{}); + const struct_dir = data_dir.openDir(self.locToSlice(struct_item.name), .{}) catch return FileError.StructFolderNotFound; _ = struct_dir.createFile("0.zippondata", .{}) catch |err| switch (err) { error.PathAlreadyExists => {}, - else => return err, + else => return FileError.CantMakeFile, }; } + + self.writeSchemaFile(); + } + + // Stuff for schema + + pub fn readSchemaFile(allocator: Allocator, sub_path: []const u8, buffer: []u8) !usize { + const path = try std.fmt.allocPrint(allocator, "{s}/schema.zipponschema", .{sub_path}); + defer allocator.free(path); + + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + const len = try file.readAll(buffer); + return len; + } + + pub fn writeSchemaFile(self: *FileEngine) void { + // Delete the current schema file + // Create a new one + // Dumpe the buffer inside + var zippon_dir = std.fs.cwd().openDir(self.path_to_ZipponDB_dir, .{}) catch @panic("Cant open main folder!"); + defer zippon_dir.close(); + zippon_dir.deleteFile("schema.zipponschema") catch |err| switch (err) { + error.FileNotFound => {}, + else => @panic("Error other than file not found when writing the schema."), + }; + + var file = zippon_dir.createFile("schema.zipponschema", .{}) catch @panic("Can't create new schema file"); + defer file.close(); + file.writeAll(self.null_terminated_schema_buff) catch @panic("Can't write new schema"); + } + + pub fn locToSlice(self: *FileEngine, loc: SchemaToken.Loc) []const u8 { + return self.null_terminated_schema_buff[loc.start..loc.end]; + } + + pub fn columnIndexOfMember(self: *FileEngine, struct_name: []const u8, member_name: []const u8) ?usize { + var i: u16 = 0; + + for (self.structName2structMembers(struct_name)) |mn| { + if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return i; + i += 1; + } + + return null; + } + + /// Get the type of the member + pub fn memberName2DataType(self: *FileEngine, struct_name: []const u8, member_name: []const u8) ?DataType { + var i: u16 = 0; + + for (self.structName2structMembers(struct_name)) |mn| { + if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return self.structName2DataType(struct_name)[i]; + i += 1; + } + + return null; + } + + /// Get the list of all member name for a struct name + pub fn structName2structMembers(self: *FileEngine, struct_name: []const u8) []SchemaToken.Loc { + var i: u16 = 0; + + while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) break; + + if (i == self.struct_array.items.len) { + @panic("Struct name not found!"); + } + + return self.struct_array.items[i].members.items; + } + + pub fn structName2DataType(self: *FileEngine, struct_name: []const u8) []const DataType { + var i: u16 = 0; + + while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) break; + + return self.struct_array.items[i].types.items; + } + + /// Chech if the name of a struct is in the current schema + pub fn isStructNameExists(self: *FileEngine, struct_name: []const u8) bool { + var i: u16 = 0; + while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) return true; + return false; + } + + /// Check if a struct have the member name + pub fn isMemberNameInStruct(self: *FileEngine, struct_name: []const u8, member_name: []const u8) bool { + for (self.structName2structMembers(struct_name)) |mn| { + if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return true; + } + return false; + } + + /// Check if a string is a name of a struct in the currently use engine + pub fn isStructInSchema(self: *FileEngine, struct_name_to_check: []const u8) bool { + for (self.struct_array.items) |struct_schema| { + if (std.mem.eql(u8, struct_name_to_check, struct_schema.name)) { + return true; + } + } + return false; + } + + // Return true if the map have all the member name as key and not more + pub fn checkIfAllMemberInMap(self: *FileEngine, struct_name: []const u8, map: *std.StringHashMap([]const u8)) bool { + const all_struct_member = self.structName2structMembers(struct_name); + var count: u16 = 0; + + for (all_struct_member) |mn| { + if (map.contains(self.locToSlice(mn))) count += 1 else std.debug.print("Missing: {s}\n", .{self.locToSlice(mn)}); + } + + return ((count == all_struct_member.len) and (count == map.count())); } }; test "Get list of UUID using condition" { const allocator = std.testing.allocator; - var data_engine = FileEngine.init(allocator, null); + + var file_engine = FileEngine.init(allocator, null); + defer file_engine.deinit(); var uuid_array = std.ArrayList(UUID).init(allocator); defer uuid_array.deinit(); const condition = FileEngine.Condition{ .struct_name = "User", .member_name = "email", .value = "adrien@mail.com", .operation = .equal, .data_type = .str }; - try data_engine.getUUIDListUsingCondition(condition, &uuid_array); + try file_engine.getUUIDListUsingCondition(condition, &uuid_array); } // Series of functions to use just before creating an entity. @@ -545,17 +595,3 @@ test "Data parsing" { // TODO: Test the string array } - -// Test tokenizer - -test "basic query" { - try testTokenize("001 123 0185", &.{ .int_literal, .int_literal, .int_literal }); -} - -fn testTokenize(source: [:0]const u8, expected_token_tags: []const FileEngine.Token.Tag) !void { - var tokenizer = FileEngine.Tokenizer.init(source); - for (expected_token_tags) |expected_token_tag| { - const token = tokenizer.next(); - try std.testing.expectEqual(expected_token_tag, token.tag); - } -} diff --git a/src/schemaParser.zig b/src/schemaParser.zig index 32ab2c4..4fe5682 100644 --- a/src/schemaParser.zig +++ b/src/schemaParser.zig @@ -1,184 +1,231 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const DataType = @import("types/dataType.zig").DataType; const Toker = @import("tokenizers/schema.zig").Tokenizer; const Token = @import("tokenizers/schema.zig").Token; +const stdout = std.io.getStdOut().writer(); + +fn send(comptime format: []const u8, args: anytype) void { + stdout.print(format, args) catch |err| { + std.log.err("Can't send: {any}", .{err}); + stdout.print("\x03\n", .{}) catch {}; + }; + + stdout.print("\x03\n", .{}) catch {}; +} + pub const Parser = struct { - file: std.fs.File, + toker: *Toker, + allocator: Allocator, + + pub fn init(toker: *Toker, allocator: Allocator) Parser { + return .{ + .allocator = allocator, + .toker = toker, + }; + } + + // Maybe I the name and member can be Loc, with a start and end, and use the buffer to get back the value + // This is how Token works + // From my understanding this is the same here. I put slices, that can just a len and a pointer, put I con't save the value itself. + // Or maybe I do actually, and an array of pointer would be *[]u8 + pub const SchemaStruct = struct { + allocator: Allocator, + name: Token.Loc, + members: std.ArrayList(Token.Loc), + types: std.ArrayList(DataType), + + pub fn init(allocator: Allocator, name: Token.Loc) SchemaStruct { + return SchemaStruct{ .allocator = allocator, .name = name, .members = std.ArrayList(Token.Loc).init(allocator), .types = std.ArrayList(DataType).init(allocator) }; + } + + pub fn deinit(self: *SchemaStruct) void { + self.types.deinit(); + self.members.deinit(); + } + }; const State = enum { - start, + end, invalid, - - expect_l_paren, - expect_r_paren, + expect_struct_name_OR_end, expect_member_name, - expect_two_dot, + expect_l_paren, + expect_member_name_OR_r_paren, expect_value_type, + expext_array_type, + expect_two_dot, expect_comma, }; - pub fn init() Parser { - return .{ - .file = undefined, - }; - } + // TODO: Pass that to the FileEngine and do the metadata.zig file instead + pub fn parse(self: *Parser, struct_array: *std.ArrayList(SchemaStruct)) !void { + var state: State = .expect_struct_name_OR_end; + var index: usize = 0; + var keep_next = false; - fn writeToFile(self: *const Parser, text: []const u8) void { - const bytes_written = self.file.write(text) catch |err| { - std.debug.print("Error when writing dtypes.zig: {}", .{err}); - return; - }; - _ = bytes_written; - } - - // TODO: Pass that to the DataEngine and do the metadata.zig file instead - pub fn parse(self: *Parser, toker: *Toker, buffer: []u8) void { - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - var struct_array = std.ArrayList([]u8).init(allocator); - - var state: State = .start; - - std.fs.cwd().deleteFile("src/metadata.zig") catch {}; - - self.file = std.fs.cwd().createFile("src/metadata.zig", .{}) catch |err| { - std.debug.print("Error when writing dtypes.zig: {}", .{err}); - return; - }; - defer self.file.close(); - - self.writeToFile("const std = @import(\"std\");\nconst UUID = @import(\"uuid.zig\").UUID;\n\n"); - - var token = toker.next(); - while (token.tag != Token.Tag.eof) : (token = toker.next()) { + var token = self.toker.next(); + while ((state != .end) and (state != .invalid)) : ({ + token = if (!keep_next) self.toker.next() else token; + keep_next = false; + }) { switch (state) { - .start => switch (token.tag) { + .expect_struct_name_OR_end => switch (token.tag) { .identifier => { state = .expect_l_paren; - self.writeToFile("pub const "); - self.writeToFile(buffer[token.loc.start..token.loc.end]); - self.writeToFile(" = struct {\n"); - self.writeToFile(" id: UUID,\n"); + struct_array.append(SchemaStruct.init(self.allocator, token.loc)) catch @panic("Error appending a struct name."); + }, + .eof => state = .end, + else => { + self.printError("Error parsing schema: Expected a struct name", &token); + state = .invalid; + }, + }, - // TODO: Check if struct name is already use - struct_array.append(buffer[token.loc.start..token.loc.end]) catch @panic("Error appending a struct name."); - }, - else => { - state = .invalid; - }, - }, .expect_l_paren => switch (token.tag) { - .l_paren => { - state = .expect_member_name; - }, + .l_paren => state = .expect_member_name, else => { + self.printError("Error parsing schema: Expected (", &token); state = .invalid; }, }, - .expect_member_name => switch (token.tag) { + + .expect_member_name_OR_r_paren => switch (token.tag) { .identifier => { - state = .expect_two_dot; - self.writeToFile(" "); - self.writeToFile(buffer[token.loc.start..token.loc.end]); + state = .expect_member_name; + keep_next = true; }, .r_paren => { - state = .start; - self.writeToFile("};\n\n"); + state = .expect_struct_name_OR_end; + index += 1; }, else => { + self.printError("Error parsing schema: Expected member name or )", &token); state = .invalid; }, }, + + .expect_member_name => { + state = .expect_two_dot; + struct_array.items[index].members.append(token.loc) catch @panic("Error appending a member name."); + }, + .expect_two_dot => switch (token.tag) { - .two_dot => { - state = .expect_value_type; - self.writeToFile(": "); - }, + .two_dot => state = .expect_value_type, else => { + self.printError("Error parsing schema: Expected :", &token); state = .invalid; }, }, + .expect_value_type => switch (token.tag) { .type_int => { state = .expect_comma; - self.writeToFile("i64"); + struct_array.items[index].types.append(DataType.int) catch @panic("Error appending a type."); }, .type_str => { state = .expect_comma; - self.writeToFile("[] u8"); + struct_array.items[index].types.append(DataType.str) catch @panic("Error appending a type."); }, .type_float => { state = .expect_comma; - self.writeToFile("f64"); + struct_array.items[index].types.append(DataType.float) catch @panic("Error appending a type."); + }, + .type_bool => { + state = .expect_comma; + struct_array.items[index].types.append(DataType.bool) catch @panic("Error appending a type."); + }, + .type_date => @panic("Date not yet implemented"), + .identifier => @panic("Link not yet implemented"), + .lr_bracket => state = .expext_array_type, + else => { + self.printError("Error parsing schema: Expected data type", &token); + state = .invalid; + }, + }, + + .expext_array_type => switch (token.tag) { + .type_int => { + state = .expect_comma; + struct_array.items[index].types.append(DataType.int_array) catch @panic("Error appending a type."); + }, + .type_str => { + state = .expect_comma; + struct_array.items[index].types.append(DataType.str_array) catch @panic("Error appending a type."); + }, + .type_float => { + state = .expect_comma; + struct_array.items[index].types.append(DataType.float_array) catch @panic("Error appending a type."); + }, + .type_bool => { + state = .expect_comma; + struct_array.items[index].types.append(DataType.bool_array) catch @panic("Error appending a type."); }, .type_date => { - @panic("Date not yet implemented"); + self.printError("Error parsing schema: Data not yet implemented", &token); + state = .invalid; }, .identifier => { - @panic("Link not yet implemented"); - }, - .lr_bracket => { - @panic("Array not yet implemented"); + self.printError("Error parsing schema: Relationship not yet implemented", &token); + state = .invalid; }, else => { + self.printError("Error parsing schema: Expected data type", &token); state = .invalid; }, }, + .expect_comma => switch (token.tag) { - .comma => { - state = .expect_member_name; - self.writeToFile(",\n"); - }, + .comma => state = .expect_member_name_OR_r_paren, else => { + self.printError("Error parsing schema: Expected ,", &token); state = .invalid; }, }, - .invalid => { - // TODO: Better errors - @panic("Error: Schema need to start with an Identifier."); - }, - else => { - @panic(""); - }, + + else => unreachable, } } - // Use @embedFile + // if invalid, empty the list + if (state == .invalid) { + for (0..struct_array.items.len) |i| { + struct_array.items[i].deinit(); + } - // Make the union `Type` with all different struct - self.writeToFile("pub const Types = union {\n"); - for (struct_array.items) |struct_name| { - self.writeToFile(" "); - self.writeToFile(struct_name); - self.writeToFile(": *"); - self.writeToFile(struct_name); - self.writeToFile(",\n"); + for (0..struct_array.items.len) |_| { + _ = struct_array.pop(); + } + return error.SchemaNotConform; } - self.writeToFile("};\n\n"); + } - // Make an array of struct name - self.writeToFile("pub const struct_name_list: ["); - var int_buffer: [20]u8 = undefined; - const len = std.fmt.formatIntBuf(&int_buffer, @as(usize, struct_array.items.len), 10, .lower, .{}); - self.writeToFile(int_buffer[0..len]); - self.writeToFile("][]const u8 = .{ "); - for (struct_array.items) |struct_name| { - self.writeToFile(" \""); - self.writeToFile(struct_name); - self.writeToFile("\", "); + fn printError(self: *Parser, message: []const u8, token: *Token) void { + stdout.print("\n", .{}) catch {}; + + const output = self.allocator.dupe(u8, self.toker.buffer) catch @panic("Cant allocator memory when print error"); + defer self.allocator.free(output); + + std.mem.replaceScalar(u8, output, '\n', ' '); + stdout.print("{s}\n", .{output}) catch {}; + + // Calculate the number of spaces needed to reach the start position. + var spaces: usize = 0; + while (spaces < token.loc.start) : (spaces += 1) { + stdout.print(" ", .{}) catch {}; } - self.writeToFile("};\n\n"); - // Create the var that contain the description of the current schema to be printed when running: - // The query "__DESCRIBE__" on the engine - // Or the command `schema describe` on the console - self.writeToFile("pub const describe_str = \""); - var escaped_text: [1024]u8 = undefined; - const replacement_count = std.mem.replace(u8, buffer, "\n", "\\n", &escaped_text); - const escaped_text_len = replacement_count + buffer.len; - self.writeToFile(escaped_text[0..escaped_text_len]); - self.writeToFile("\";"); + // Print the '^' characters for the error span. + var i: usize = token.loc.start; + while (i < token.loc.end) : (i += 1) { + stdout.print("^", .{}) catch {}; + } + stdout.print(" \n", .{}) catch {}; // Align with the message + + stdout.print("{s}\n", .{message}) catch {}; + + send("", .{}); } }; diff --git a/src/tokenizers/cli.zig b/src/tokenizers/cli.zig index 4f85e27..5f5dfc0 100644 --- a/src/tokenizers/cli.zig +++ b/src/tokenizers/cli.zig @@ -59,6 +59,10 @@ pub const Tokenizer = struct { string_literal_backslash, }; + pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.start..token.loc.end]; + } + pub fn next(self: *Tokenizer) Token { var state: State = .start; var result: Token = .{ @@ -104,7 +108,7 @@ pub const Tokenizer = struct { }, .identifier => switch (c) { - 'a'...'z', 'A'...'Z', '_', '0'...'9' => continue, + 'a'...'z', 'A'...'Z', '_', '0'...'9', '.' => continue, else => { if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |tag| { result.tag = tag; diff --git a/src/tokenizers/file.zig b/src/tokenizers/file.zig new file mode 100644 index 0000000..90fa83c --- /dev/null +++ b/src/tokenizers/file.zig @@ -0,0 +1,127 @@ +const std = @import("std"); + +pub const Token = struct { + tag: Tag, + loc: Loc, + + pub const Loc = struct { + start: usize, + end: usize, + }; + + pub const Tag = enum { + string_literal, + int_literal, + float_literal, + l_bracket, // [ + r_bracket, // ] + }; +}; + +pub const Tokenizer = struct { + buffer: [:0]const u8, + index: usize, + + // Maybe change that to use the stream directly so I dont have to read the line 2 times + pub fn init(buffer: [:0]const u8) Tokenizer { + // Skip the UTF-8 BOM if present. + return .{ + .buffer = buffer, + .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, // WTF ? I guess some OS add that or some shit like that + }; + } + + const State = enum { + start, + string_literal, + float, + int, + }; + + pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.start..token.loc.end]; + } + + pub fn next(self: *Tokenizer) Token { + // That ugly but work + if (self.buffer[self.index] == ' ') self.index += 1; + + var state: State = .start; + var result: Token = .{ + .tag = undefined, + .loc = .{ + .start = self.index, + .end = undefined, + }, + }; + while (true) : (self.index += 1) { + const c = self.buffer[self.index]; + + if (self.index == self.buffer.len) break; + + switch (state) { + .start => switch (c) { + '\'' => { + state = .string_literal; + result.tag = .string_literal; + }, + '0'...'9', '-' => { + state = .int; + result.tag = .int_literal; + }, + '[' => { + result.tag = .l_bracket; + self.index += 1; + break; + }, + ']' => { + result.tag = .r_bracket; + self.index += 1; + break; + }, + else => std.debug.print("Unknow character: {c}\n", .{c}), + }, + + .string_literal => switch (c) { + '\'' => { + self.index += 1; + break; + }, + else => continue, + }, + + .int => switch (c) { + '.' => { + state = .float; + result.tag = .float_literal; + }, + '0'...'9' => continue, + else => break, + }, + .float => switch (c) { + '0'...'9' => { + continue; + }, + else => { + break; + }, + }, + } + } + + result.loc.end = self.index; + return result; + } +}; + +test "Basics" { + try testTokenize("193 88.92 [ 123] 'hello mommy'", &.{ .int_literal, .float_literal, .l_bracket, .int_literal, .r_bracket }); +} + +fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { + var tokenizer = Tokenizer.init(source); + for (expected_token_tags) |expected_token_tag| { + const token = tokenizer.next(); + try std.testing.expectEqual(expected_token_tag, token.tag); + } +} diff --git a/src/tokenizers/schema.zig b/src/tokenizers/schema.zig index 8a7b246..881d617 100644 --- a/src/tokenizers/schema.zig +++ b/src/tokenizers/schema.zig @@ -14,6 +14,7 @@ pub const Token = struct { .{ "int", .type_int }, .{ "float", .type_float }, .{ "str", .type_str }, + .{ "bool", .type_bool }, .{ "date", .type_date }, }); @@ -28,6 +29,7 @@ pub const Token = struct { type_int, type_float, type_str, + type_bool, type_date, identifier, @@ -59,6 +61,10 @@ pub const Tokenizer = struct { l_bracket, }; + pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.start..token.loc.end]; + } + pub fn next(self: *Tokenizer) Token { var state: State = .start; var result: Token = .{ diff --git a/src/engines/types/dataType.zig b/src/types/dataType.zig similarity index 100% rename from src/engines/types/dataType.zig rename to src/types/dataType.zig diff --git a/src/engines/types/uuid.zig b/src/types/uuid.zig similarity index 100% rename from src/engines/types/uuid.zig rename to src/types/uuid.zig diff --git a/src/utils.zig b/src/utils.zig new file mode 100644 index 0000000..bb81675 --- /dev/null +++ b/src/utils.zig @@ -0,0 +1,14 @@ +const std = @import("std"); + +pub fn getEnvVariables(allocator: std.mem.Allocator, variable: []const u8) ?[]const u8 { + var env_map = try std.process.getEnvMap(allocator); + defer env_map.deinit(); + + var iter = env_map.iterator(); + + while (iter.next()) |entry| { + if (std.mem.eql(u8, entry.key_ptr.*, variable)) return allocator.dupe(u8, entry.key_ptr.*); + } + + return null; +} diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 7b78327..111647b 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -1,10 +1,9 @@ const std = @import("std"); -const schemaEngine = @import("engines/schema.zig"); -const DataEngine = @import("engines/file.zig").FileEngine; -const Condition = @import("engines/file.zig").FileEngine.Condition; +const FileEngine = @import("fileEngine.zig").FileEngine; +const Condition = @import("fileEngine.zig").FileEngine.Condition; const Tokenizer = @import("tokenizers/ziql.zig").Tokenizer; const Token = @import("tokenizers/ziql.zig").Token; -const UUID = @import("engines/types/uuid.zig").UUID; +const UUID = @import("types/uuid.zig").UUID; const Allocator = std.mem.Allocator; const stdout = std.io.getStdOut().writer(); @@ -22,21 +21,20 @@ pub const Parser = struct { allocator: Allocator, state: State, toker: *Tokenizer, - data_engine: DataEngine, additional_data: AdditionalData, struct_name: []const u8 = undefined, + file_engine: *FileEngine, action: enum { GRAB, ADD, UPDATE, DELETE } = undefined, - pub fn init(allocator: Allocator, toker: *Tokenizer) Parser { - // Do I need to init a DataEngine at each Parser, can't I put it in the CLI parser instead ? - const data_engine = DataEngine.init(allocator, null); + pub fn init(allocator: Allocator, toker: *Tokenizer, file_engine: *FileEngine) Parser { + // Do I need to init a FileEngine at each Parser, can't I put it in the CLI parser instead ? return Parser{ .allocator = allocator, .toker = toker, - .state = State.start, - .data_engine = data_engine, + .state = .start, .additional_data = AdditionalData.init(allocator), + .file_engine = file_engine, }; } @@ -75,7 +73,7 @@ pub const Parser = struct { expect_comma_OR_r_bracket, // For the filter parser - expect_left_condition, // Condition is a struct in DataEngine, it's all info necessary to get a list of UUID usinf DataEngine.getUUIDListUsingCondition + expect_left_condition, // Condition is a struct in FileEngine, it's all info necessary to get a list of UUID usinf FileEngine.getUUIDListUsingCondition expect_operation, // Operations are = != < <= > >= expect_value, expect_ANDOR_OR_end, @@ -129,35 +127,33 @@ pub const Parser = struct { keep_next = false; }) { switch (self.state) { - .start => { - switch (token.tag) { - .keyword_grab => { - self.action = .GRAB; - self.state = .expect_struct_name; - }, - .keyword_add => { - self.action = .ADD; - self.state = .expect_struct_name; - }, - .keyword_update => { - self.action = .UPDATE; - self.state = .expect_struct_name; - }, - .keyword_delete => { - self.action = .DELETE; - self.state = .expect_struct_name; - }, - else => { - self.printError("Error: Expected action keyword. Available: GRAB ADD DELETE UPDATE", &token); - self.state = .end; - }, - } + .start => switch (token.tag) { + .keyword_grab => { + self.action = .GRAB; + self.state = .expect_struct_name; + }, + .keyword_add => { + self.action = .ADD; + self.state = .expect_struct_name; + }, + .keyword_update => { + self.action = .UPDATE; + self.state = .expect_struct_name; + }, + .keyword_delete => { + self.action = .DELETE; + self.state = .expect_struct_name; + }, + else => { + self.printError("Error: Expected action keyword. Available: GRAB ADD DELETE UPDATE", &token); + self.state = .end; + }, }, .expect_struct_name => { // Check if the struct name is in the schema self.struct_name = try self.allocator.dupe(u8, self.toker.getTokenSlice(token)); - if (!schemaEngine.isStructNameExists(self.struct_name)) self.printError("Error: struct name not found in schema.", &token); + if (!self.file_engine.isStructNameExists(self.struct_name)) self.printError("Error: struct name not found in schema.", &token); switch (self.action) { .ADD => self.state = .expect_new_data, else => self.state = .expect_filter_or_additional_data, @@ -186,35 +182,31 @@ pub const Parser = struct { self.state = .end; }, - .expect_new_data => { - switch (token.tag) { - .l_paren => { - keep_next = true; - self.state = .parse_new_data_and_add_data; - }, - else => self.printError("Error: Expecting new data starting with (", &token), - } + .expect_new_data => switch (token.tag) { + .l_paren => { + keep_next = true; + self.state = .parse_new_data_and_add_data; + }, + else => self.printError("Error: Expecting new data starting with (", &token), }, - .parse_new_data_and_add_data => { - switch (self.action) { - .ADD => { - var data_map = std.StringHashMap([]const u8).init(self.allocator); - defer data_map.deinit(); - self.parseNewData(&data_map); + .parse_new_data_and_add_data => switch (self.action) { + .ADD => { + var data_map = std.StringHashMap([]const u8).init(self.allocator); + defer data_map.deinit(); + self.parseNewData(&data_map); - // TODO: Print the list of missing - if (!schemaEngine.checkIfAllMemberInMap(self.struct_name, &data_map)) self.printError("Error: Missing member", &token); - const uuid = self.data_engine.writeEntity(self.struct_name, data_map) catch { - send("ZipponDB error: Couln't write new data to file", .{}); - continue; - }; - send("Successfully added new {s} with UUID: {s}", .{ self.struct_name, uuid.format_uuid() }); - self.state = .end; - }, - .UPDATE => {}, // TODO: - else => unreachable, - } + // TODO: Print the list of missing + if (!self.file_engine.checkIfAllMemberInMap(self.struct_name, &data_map)) self.printError("Error: Missing member", &token); + const uuid = self.file_engine.writeEntity(self.struct_name, data_map) catch { + send("ZipponDB error: Couln't write new data to file", .{}); + continue; + }; + send("Successfully added new {s} with UUID: {s}", .{ self.struct_name, uuid.format_uuid() }); + self.state = .end; + }, + .UPDATE => {}, // TODO: + else => unreachable, }, else => unreachable, @@ -255,37 +247,35 @@ pub const Parser = struct { switch (self.state) { .expect_left_condition => { token = self.parseCondition(&left_condition, &token); - try self.data_engine.getUUIDListUsingCondition(left_condition, left_array); + try self.file_engine.getUUIDListUsingCondition(left_condition, left_array); self.state = State.expect_ANDOR_OR_end; keep_next = true; }, - .expect_ANDOR_OR_end => { - switch (token.tag) { - .r_brace => { - if (main) { - self.state = State.end; - } else { - self.printError("Error: Expected } to end main condition or AND/OR to continue it", &token); - } - }, - .r_paren => { - if (!main) { - self.state = State.end; - } else { - self.printError("Error: Expected ) to end inside condition or AND/OR to continue it", &token); - } - }, - .keyword_and => { - curent_operation = .and_; - self.state = State.expect_right_uuid_array; - }, - .keyword_or => { - curent_operation = .or_; - self.state = State.expect_right_uuid_array; - }, - else => self.printError("Error: Expected a condition including AND or OR or } or )", &token), - } + .expect_ANDOR_OR_end => switch (token.tag) { + .r_brace => { + if (main) { + self.state = State.end; + } else { + self.printError("Error: Expected } to end main condition or AND/OR to continue it", &token); + } + }, + .r_paren => { + if (!main) { + self.state = State.end; + } else { + self.printError("Error: Expected ) to end inside condition or AND/OR to continue it", &token); + } + }, + .keyword_and => { + curent_operation = .and_; + self.state = State.expect_right_uuid_array; + }, + .keyword_or => { + curent_operation = .or_; + self.state = State.expect_right_uuid_array; + }, + else => self.printError("Error: Expected a condition including AND or OR or } or )", &token), }, .expect_right_uuid_array => { @@ -299,7 +289,7 @@ pub const Parser = struct { token = self.parseCondition(&right_condition, &token); keep_next = true; - try self.data_engine.getUUIDListUsingCondition(right_condition, &right_array); + try self.file_engine.getUUIDListUsingCondition(right_condition, &right_array); }, // Create a new condition and compare it else => self.printError("Error: Expecting ( or member name.", &token), } @@ -333,18 +323,16 @@ pub const Parser = struct { keep_next = false; }) { switch (self.state) { - .expect_member => { - switch (token.tag) { - .identifier => { - if (!schemaEngine.isMemberPartOfStruct(condition.struct_name, self.toker.getTokenSlice(token))) { - self.printError("Error: Member not part of struct.", &token); - } - condition.data_type = schemaEngine.memberName2DataType(condition.struct_name, self.toker.getTokenSlice(token)) orelse @panic("Couldn't find the struct and member"); - condition.member_name = self.toker.getTokenSlice(token); - self.state = State.expect_operation; - }, - else => self.printError("Error: Expected member name.", &token), - } + .expect_member => switch (token.tag) { + .identifier => { + if (!self.file_engine.isMemberNameInStruct(condition.struct_name, self.toker.getTokenSlice(token))) { + self.printError("Error: Member not part of struct.", &token); + } + condition.data_type = self.file_engine.memberName2DataType(condition.struct_name, self.toker.getTokenSlice(token)) orelse @panic("Couldn't find the struct and member"); + condition.member_name = self.toker.getTokenSlice(token); + self.state = State.expect_operation; + }, + else => self.printError("Error: Expected member name.", &token), }, .expect_operation => { @@ -470,51 +458,43 @@ pub const Parser = struct { } }, - .expect_semicolon_OR_right_bracket => { - switch (token.tag) { - .semicolon => self.state = .expect_member, - .r_bracket => self.state = .end, - else => self.printError("Error: Expect ';' or ']'.", &token), - } + .expect_semicolon_OR_right_bracket => switch (token.tag) { + .semicolon => self.state = .expect_member, + .r_bracket => self.state = .end, + else => self.printError("Error: Expect ';' or ']'.", &token), }, - .expect_member => { - switch (token.tag) { - .identifier => { - if (!schemaEngine.isMemberNameInStruct(self.struct_name, self.toker.getTokenSlice(token))) self.printError("Member not found in struct.", &token); - try additional_data.member_to_find.append( - AdditionalDataMember.init( - self.allocator, - self.toker.getTokenSlice(token), - ), - ); + .expect_member => switch (token.tag) { + .identifier => { + if (!self.file_engine.isMemberNameInStruct(self.struct_name, self.toker.getTokenSlice(token))) self.printError("Member not found in struct.", &token); + try additional_data.member_to_find.append( + AdditionalDataMember.init( + self.allocator, + self.toker.getTokenSlice(token), + ), + ); - self.state = .expect_comma_OR_r_bracket_OR_l_bracket; - }, - else => self.printError("Error: Expected a member name.", &token), - } + self.state = .expect_comma_OR_r_bracket_OR_l_bracket; + }, + else => self.printError("Error: Expected a member name.", &token), }, - .expect_comma_OR_r_bracket_OR_l_bracket => { - switch (token.tag) { - .comma => self.state = .expect_member, - .r_bracket => self.state = .end, - .l_bracket => { - try self.parseAdditionalData( - &additional_data.member_to_find.items[additional_data.member_to_find.items.len - 1].additional_data, - ); - self.state = .expect_comma_OR_r_bracket; - }, - else => self.printError("Error: Expected , or ] or [", &token), - } + .expect_comma_OR_r_bracket_OR_l_bracket => switch (token.tag) { + .comma => self.state = .expect_member, + .r_bracket => self.state = .end, + .l_bracket => { + try self.parseAdditionalData( + &additional_data.member_to_find.items[additional_data.member_to_find.items.len - 1].additional_data, + ); + self.state = .expect_comma_OR_r_bracket; + }, + else => self.printError("Error: Expected , or ] or [", &token), }, - .expect_comma_OR_r_bracket => { - switch (token.tag) { - .comma => self.state = .expect_member, - .r_bracket => self.state = .end, - else => self.printError("Error: Expected , or ]", &token), - } + .expect_comma_OR_r_bracket => switch (token.tag) { + .comma => self.state = .expect_member, + .r_bracket => self.state = .end, + else => self.printError("Error: Expected , or ]", &token), }, else => unreachable, @@ -536,144 +516,124 @@ pub const Parser = struct { keep_next = false; }) { switch (self.state) { - .expect_member => { - switch (token.tag) { - .identifier => { - member_name = self.toker.getTokenSlice(token); - if (!schemaEngine.isMemberNameInStruct(self.struct_name, member_name)) self.printError("Member not found in struct.", &token); - self.state = .expect_equal; - }, - else => self.printError("Error: Expected member name.", &token), - } + .expect_member => switch (token.tag) { + .identifier => { + member_name = self.toker.getTokenSlice(token); + if (!self.file_engine.isMemberNameInStruct(self.struct_name, member_name)) self.printError("Member not found in struct.", &token); + self.state = .expect_equal; + }, + else => self.printError("Error: Expected member name.", &token), }, - .expect_equal => { - switch (token.tag) { - // TODO: Add more comparison like IN or other stuff - .equal => self.state = .expect_new_value, - else => self.printError("Error: Expected =", &token), - } + .expect_equal => switch (token.tag) { + // TODO: Add more comparison like IN or other stuff + .equal => self.state = .expect_new_value, + else => self.printError("Error: Expected =", &token), }, .expect_new_value => { - const data_type = schemaEngine.memberName2DataType(self.struct_name, member_name); + const data_type = self.file_engine.memberName2DataType(self.struct_name, member_name); switch (data_type.?) { - .int => { - switch (token.tag) { - .int_literal, .keyword_null => { - member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected int", &token), - } + .int => switch (token.tag) { + .int_literal, .keyword_null => { + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected int", &token), }, - .float => { - switch (token.tag) { - .float_literal, .keyword_null => { - member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected float", &token), - } + .float => switch (token.tag) { + .float_literal, .keyword_null => { + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected float", &token), }, - .bool => { - switch (token.tag) { - .bool_literal_true => { - member_map.put(member_name, "1") catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - .bool_literal_false => { - member_map.put(member_name, "0") catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - .keyword_null => { - member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected bool: true false", &token), - } + .bool => switch (token.tag) { + .bool_literal_true => { + member_map.put(member_name, "1") catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + .bool_literal_false => { + member_map.put(member_name, "0") catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + .keyword_null => { + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected bool: true false", &token), }, - .str => { - switch (token.tag) { - .string_literal, .keyword_null => { - member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected string between ''", &token), - } + .str => switch (token.tag) { + .string_literal, .keyword_null => { + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected string between ''", &token), }, // TODO: Maybe upgrade that to use multiple state - .int_array => { - switch (token.tag) { - .l_bracket => { - const start_index = token.loc.start; - token = self.toker.next(); - while (token.tag != .r_bracket) : (token = self.toker.next()) { - switch (token.tag) { - .int_literal => continue, - else => self.printError("Error: Expected int or ].", &token), - } + .int_array => switch (token.tag) { + .l_bracket => { + const start_index = token.loc.start; + token = self.toker.next(); + while (token.tag != .r_bracket) : (token = self.toker.next()) { + switch (token.tag) { + .int_literal => continue, + else => self.printError("Error: Expected int or ].", &token), } - // Maybe change that as it just recreate a string that is already in the buffer - member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected [ to start an array", &token), - } + } + // Maybe change that as it just recreate a string that is already in the buffer + member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected [ to start an array", &token), }, - .float_array => { - switch (token.tag) { - .l_bracket => { - const start_index = token.loc.start; - token = self.toker.next(); - while (token.tag != .r_bracket) : (token = self.toker.next()) { - switch (token.tag) { - .float_literal => continue, - else => self.printError("Error: Expected float or ].", &token), - } + .float_array => switch (token.tag) { + .l_bracket => { + const start_index = token.loc.start; + token = self.toker.next(); + while (token.tag != .r_bracket) : (token = self.toker.next()) { + switch (token.tag) { + .float_literal => continue, + else => self.printError("Error: Expected float or ].", &token), } - // Maybe change that as it just recreate a string that is already in the buffer - member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected [ to start an array", &token), - } + } + // Maybe change that as it just recreate a string that is already in the buffer + member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected [ to start an array", &token), }, - .bool_array => { - switch (token.tag) { - .l_bracket => { - const start_index = token.loc.start; - token = self.toker.next(); - while (token.tag != .r_bracket) : (token = self.toker.next()) { - switch (token.tag) { - .bool_literal_false, .bool_literal_true => continue, - else => self.printError("Error: Expected bool or ].", &token), - } + .bool_array => switch (token.tag) { + .l_bracket => { + const start_index = token.loc.start; + token = self.toker.next(); + while (token.tag != .r_bracket) : (token = self.toker.next()) { + switch (token.tag) { + .bool_literal_false, .bool_literal_true => continue, + else => self.printError("Error: Expected bool or ].", &token), } - // Maybe change that as it just recreate a string that is already in the buffer - member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected [ to start an array", &token), - } + } + // Maybe change that as it just recreate a string that is already in the buffer + member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected [ to start an array", &token), }, - .str_array => { - switch (token.tag) { - .l_bracket => { - const start_index = token.loc.start; - token = self.toker.next(); - while (token.tag != .r_bracket) : (token = self.toker.next()) { - switch (token.tag) { - .string_literal => continue, - else => self.printError("Error: Expected str or ].", &token), - } + .str_array => switch (token.tag) { + .l_bracket => { + const start_index = token.loc.start; + token = self.toker.next(); + while (token.tag != .r_bracket) : (token = self.toker.next()) { + switch (token.tag) { + .string_literal => continue, + else => self.printError("Error: Expected str or ].", &token), } - // Maybe change that as it just recreate a string that is already in the buffer - member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); - self.state = .expect_comma_OR_end; - }, - else => self.printError("Error: Expected [ to start an array", &token), - } + } + // Maybe change that as it just recreate a string that is already in the buffer + member_map.put(member_name, self.toker.buffer[start_index..token.loc.end]) catch @panic("Couln't add string of array in data map"); + self.state = .expect_comma_OR_end; + }, + else => self.printError("Error: Expected [ to start an array", &token), }, } }, @@ -710,8 +670,6 @@ pub const Parser = struct { stdout.print("{s}\n", .{message}) catch {}; - stdout.print("{any}\n{any}\n", .{ token.tag, token.loc }) catch {}; - send("", .{}); } }; @@ -832,185 +790,13 @@ test "GRAB filter with int" { fn testParsing(source: [:0]const u8) !void { const allocator = std.testing.allocator; + + var file_engine = FileEngine.init(allocator, null); + defer file_engine.deinit(); + var tokenizer = Tokenizer.init(source); - var parser = Parser.init(allocator, &tokenizer); + var parser = Parser.init(allocator, &tokenizer, &file_engine); defer parser.deinit(); + try parser.parse(); } - -test "Parse condition" { - const condition1 = Condition{ .data_type = .int, .member_name = "age", .operation = .superior_or_equal, .struct_name = "User", .value = "26" }; - try testConditionParsing("age >= 26", condition1); - - const condition2 = Condition{ .data_type = .int_array, .member_name = "scores", .operation = .equal, .struct_name = "User", .value = "[1 2 42]" }; - try testConditionParsing("scores = [1 2 42]", condition2); - - const condition3 = Condition{ .data_type = .str, .member_name = "email", .operation = .equal, .struct_name = "User", .value = "'adrien@email.com'" }; - try testConditionParsing("email = 'adrien@email.com'", condition3); -} - -fn testConditionParsing(source: [:0]const u8, expected_condition: Condition) !void { - const allocator = std.testing.allocator; - var tokenizer = Tokenizer.init(source); - var parser = Parser.init(allocator, &tokenizer); - var token = tokenizer.next(); - - var condition = Condition.init("User"); - _ = parser.parseCondition(&condition, &token); - - try std.testing.expect(compareCondition(expected_condition, condition)); -} - -fn compareCondition(c1: Condition, c2: Condition) bool { - return ((std.mem.eql(u8, c1.value, c2.value)) and (std.mem.eql(u8, c1.struct_name, c2.struct_name)) and (std.mem.eql(u8, c1.member_name, c2.member_name)) and (c1.operation == c2.operation) and (c1.data_type == c2.data_type)); -} - -test "Parse new data" { - const allocator = std.testing.allocator; - - var map1 = std.StringHashMap([]const u8).init(allocator); - defer map1.deinit(); - try map1.put("name", "'Adrien'"); - testNewDataParsing("(name = 'Adrien')", map1); - - var map2 = std.StringHashMap([]const u8).init(allocator); - defer map2.deinit(); - try map2.put("name", "'Adrien'"); - try map2.put("email", "'adrien@email.com'"); - try map2.put("scores", "[1 4 19]"); - try map2.put("age", "26"); - testNewDataParsing("(name = 'Adrien', scores = [1 4 19], age = 26, email = 'adrien@email.com')", map2); -} - -fn testNewDataParsing(source: [:0]const u8, expected_member_map: std.StringHashMap([]const u8)) void { - const allocator = std.testing.allocator; - var tokenizer = Tokenizer.init(source); - - var parser = Parser.init(allocator, &tokenizer); - parser.struct_name = allocator.dupe(u8, "User") catch @panic("Cant alloc struct name"); - defer parser.deinit(); - - var data_map = std.StringHashMap([]const u8).init(allocator); - defer data_map.deinit(); - - _ = tokenizer.next(); - parser.parseNewData(&data_map); - - var iterator = expected_member_map.iterator(); - - var expected_total_count: usize = 0; - var found_count: usize = 0; - var error_found = false; - while (iterator.next()) |entry| { - expected_total_count += 1; - if (!data_map.contains(entry.key_ptr.*)) { - std.debug.print("Error new data parsing: Missing {s} in parsed map.\n", .{entry.key_ptr.*}); - error_found = true; - continue; - } - if (!std.mem.eql(u8, entry.value_ptr.*, data_map.get(entry.key_ptr.*).?)) { - std.debug.print("Error new data parsing: Wrong data for {s} in parsed map.\n Expected: {s}\n Got: {s}", .{ entry.key_ptr.*, entry.value_ptr.*, data_map.get(entry.key_ptr.*).? }); - error_found = true; - continue; - } - found_count += 1; - } - - if ((error_found) or (expected_total_count != found_count)) @panic("=("); -} - -test "Parse filter" { - const allocator = std.testing.allocator; - - var tokenizer = Tokenizer.init("{name = 'Adrien'}"); - var parser = Parser.init(allocator, &tokenizer); - parser.struct_name = allocator.dupe(u8, "User") catch @panic("Cant alloc struct name"); // Otherwise get an error trying to free this when deinit - - defer parser.deinit(); - _ = tokenizer.next(); // Start at name - - var uuid_array = std.ArrayList(UUID).init(allocator); - defer uuid_array.deinit(); - - try parser.parseFilter(&uuid_array, "User", true); -} - -test "Parse additional data" { - const allocator = std.testing.allocator; - - var additional_data1 = Parser.AdditionalData.init(allocator); - additional_data1.entity_count_to_find = 1; - testAdditionalData("[1]", additional_data1); - - var additional_data2 = Parser.AdditionalData.init(allocator); - defer additional_data2.deinit(); - try additional_data2.member_to_find.append( - Parser.AdditionalDataMember.init( - allocator, - "name", - ), - ); - testAdditionalData("[name]", additional_data2); - - var additional_data3 = Parser.AdditionalData.init(allocator); - additional_data3.entity_count_to_find = 1; - defer additional_data3.deinit(); - try additional_data3.member_to_find.append( - Parser.AdditionalDataMember.init( - allocator, - "name", - ), - ); - testAdditionalData("[1; name]", additional_data3); - - var additional_data4 = Parser.AdditionalData.init(allocator); - additional_data4.entity_count_to_find = 100; - defer additional_data4.deinit(); - try additional_data4.member_to_find.append( - Parser.AdditionalDataMember.init( - allocator, - "friends", - ), - ); - testAdditionalData("[100; friends [name]]", additional_data4); -} - -fn testAdditionalData(source: [:0]const u8, expected_AdditionalData: Parser.AdditionalData) void { - const allocator = std.testing.allocator; - var tokenizer = Tokenizer.init(source); - - var parser = Parser.init(allocator, &tokenizer); - parser.struct_name = allocator.dupe(u8, "User") catch @panic("Cant alloc struct name"); - defer parser.deinit(); - - _ = tokenizer.next(); - parser.parseAdditionalData(&parser.additional_data) catch |err| { - std.debug.print("Error parsing additional data: {any}\n", .{err}); - }; - - compareAdditionalData(expected_AdditionalData, parser.additional_data); -} - -fn compareAdditionalData(ad1: Parser.AdditionalData, ad2: Parser.AdditionalData) void { - std.testing.expectEqual(ad1.entity_count_to_find, ad2.entity_count_to_find) catch { - std.debug.print("Additional data entity_count_to_find are not equal.\n", .{}); - }; - - var founded = false; - - for (ad1.member_to_find.items) |elem1| { - founded = false; - for (ad2.member_to_find.items) |elem2| { - if (std.mem.eql(u8, elem1.name, elem2.name)) { - compareAdditionalData(elem1.additional_data, elem2.additional_data); - founded = true; - break; - } - } - - std.testing.expect(founded) catch { - std.debug.print("{s} not found\n", .{elem1.name}); - @panic("=("); - }; - } -}