From b008f434a6bbff96dbe23e7718a5e7eee40c0156 Mon Sep 17 00:00:00 2001 From: MrBounty Date: Wed, 9 Oct 2024 23:20:28 +0200 Subject: [PATCH] Passed to one tabular file for each struct Created a new Parser unique for the FileEngine to read each line. It is slower as I need to parser character by character because their is no fixed len for the data in files. Before I was just reading until the end of the file. Im gonna need to find some tricks to improve the parsing of data. I am thinking using the stream directly instead of doing streamUntilDelimiter --- build.zig | 11 - src/dataParser.zig | 111 ----------- src/fileEngine.zig | 464 ++++++++++++++++++++++++++++++++----------- src/schemaEngine.zig | 12 ++ src/ziqlParser.zig | 64 ++++-- 5 files changed, 405 insertions(+), 257 deletions(-) delete mode 100644 src/dataParser.zig diff --git a/build.zig b/build.zig index 01a9794..7378c91 100644 --- a/build.zig +++ b/build.zig @@ -18,16 +18,6 @@ pub fn build(b: *std.Build) void { const run_step = b.step("run", "Run the app"); run_step.dependOn(&run_cmd.step); - // Test step - const tests1 = b.addTest(.{ - .root_source_file = b.path("src/dataParser.zig"), - .target = target, - .optimize = optimize, - .name = "Data parsing", - .test_runner = b.path("test_runner.zig"), - }); - const run_tests1 = b.addRunArtifact(tests1); - const tests2 = b.addTest(.{ .root_source_file = b.path("src/tokenizers/cli.zig"), .target = target, @@ -83,7 +73,6 @@ pub fn build(b: *std.Build) void { const run_tests7 = b.addRunArtifact(tests7); const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_tests1.step); test_step.dependOn(&run_tests2.step); test_step.dependOn(&run_tests3.step); test_step.dependOn(&run_tests4.step); diff --git a/src/dataParser.zig b/src/dataParser.zig deleted file mode 100644 index 3b197c7..0000000 --- a/src/dataParser.zig +++ /dev/null @@ -1,111 +0,0 @@ -const std = @import("std"); - -// Series of functions to use just before creating an entity. -// Will transform the string of data into data of the right type. - -// Maybe return a null or something else -pub fn parseInt(value_str: []const u8) i64 { - return std.fmt.parseInt(i64, value_str, 10) catch return 0; -} - -pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) { - var array = std.ArrayList(i64).init(allocator); - - var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); - while (it.next()) |x| { - array.append(parseInt(x)) catch {}; - } - - return array; -} - -pub fn parseFloat(value_str: []const u8) f64 { - return std.fmt.parseFloat(f64, value_str) catch return 0; -} - -pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) { - var array = std.ArrayList(f64).init(allocator); - - var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); - while (it.next()) |x| { - array.append(parseFloat(x)) catch {}; - } - - return array; -} - -pub fn parseBool(value_str: []const u8) bool { - return (value_str[0] != '0'); -} - -pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) { - var array = std.ArrayList(bool).init(allocator); - - var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); - while (it.next()) |x| { - array.append(parseBool(x)) catch {}; - } - - return array; -} - -// FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't -pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) { - var array = std.ArrayList([]const u8).init(allocator); - - var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); - while (it.next()) |x| { - const x_copy = allocator.dupe(u8, x) catch @panic("=("); - array.append(x_copy) catch {}; - } - - return array; -} - -test "Data parsing" { - const allocator = std.testing.allocator; - - // Int - const in1: [3][]const u8 = .{ "1", "42", "Hello" }; - const expected_out1: [3]i64 = .{ 1, 42, 0 }; - for (in1, 0..) |value, i| { - try std.testing.expect(parseInt(value) == expected_out1[i]); - } - - // Int array - const in2 = "[1 14 44 42 hello]"; - const out2 = parseArrayInt(allocator, in2); - defer out2.deinit(); - const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 }; - try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2)); - - // Float - const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" }; - const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 }; - for (in3, 0..) |value, i| { - try std.testing.expect(parseFloat(value) == expected_out3[i]); - } - - // Float array - const in4 = "[1.5 14.3 44.9999 42 hello]"; - const out4 = parseArrayFloat(allocator, in4); - defer out4.deinit(); - const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 }; - try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4)); - - // Bool - const in5: [3][]const u8 = .{ "1", "Hello", "0" }; - const expected_out5: [3]bool = .{ true, true, false }; - for (in5, 0..) |value, i| { - try std.testing.expect(parseBool(value) == expected_out5[i]); - } - - // Bool array - const in6 = "[1 0 0 1 1]"; - const out6 = parseArrayBool(allocator, in6); - defer out6.deinit(); - const expected_out6: [5]bool = .{ true, false, false, true, true }; - try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6)); - - // TODO: Test the string array -} diff --git a/src/fileEngine.zig b/src/fileEngine.zig index 8c0e821..485f931 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const dataParsing = @import("dataParser.zig"); const schemaEngine = @import("schemaEngine.zig"); const Allocator = std.mem.Allocator; const UUID = @import("types/uuid.zig").UUID; @@ -14,12 +13,138 @@ pub const FileEngine = struct { path_to_DATA_dir: []const u8, // The path to the DATA folder max_file_size: usize = 5e+4, // 50kb TODO: Change - const DataEngineError = error{ - ErrorCreateDataFolder, - ErrorCreateStructFolder, - ErrorCreateMemberFolder, - ErrorCreateMainFile, - ErrorCreateDataFile, + pub const Token = struct { + tag: Tag, + loc: Loc, + + pub const Loc = struct { + start: usize, + end: usize, + }; + + pub const Tag = enum { + eof, + invalid, + + string_literal, + int_literal, + float_literal, + identifier, + equal, + bang, // ! + pipe, // | + l_paren, // ( + r_paren, // ) + l_bracket, // [ + r_bracket, // ] + l_brace, // { + r_brace, // } + semicolon, // ; + comma, // , + angle_bracket_left, // < + angle_bracket_right, // > + angle_bracket_left_equal, // <= + angle_bracket_right_equal, // >= + equal_angle_bracket_right, // => + period, // . + bang_equal, // != + }; + }; + + pub const Tokenizer = struct { + buffer: [:0]const u8, + index: usize, + + // Maybe change that to use the stream directly so I dont have to read the line 2 times + pub fn init(buffer: [:0]const u8) Tokenizer { + // Skip the UTF-8 BOM if present. + return .{ + .buffer = buffer, + .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, // WTF ? I guess some OS add that or some shit like that + }; + } + + const State = enum { + start, + string_literal, + float, + int, + }; + + pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { + return self.buffer[token.loc.start..token.loc.end]; + } + + pub fn next(self: *Tokenizer) Token { + // That ugly but work + if (self.buffer[self.index] == ' ') self.index += 1; + + var state: State = .start; + var result: Token = .{ + .tag = undefined, + .loc = .{ + .start = self.index, + .end = undefined, + }, + }; + while (true) : (self.index += 1) { + const c = self.buffer[self.index]; + + if (self.index == self.buffer.len) break; + + switch (state) { + .start => switch (c) { + '\'' => { + state = .string_literal; + result.tag = .string_literal; + }, + '0'...'9', '-' => { + state = .int; + result.tag = .int_literal; + }, + '[' => { + result.tag = .l_bracket; + self.index += 1; + break; + }, + ']' => { + result.tag = .r_bracket; + self.index += 1; + break; + }, + else => std.debug.print("Unknow character: {c}\n", .{c}), + }, + + .string_literal => switch (c) { + '\'' => { + self.index += 1; + break; + }, + else => continue, + }, + + .int => switch (c) { + '.' => { + state = .float; + result.tag = .float_literal; + }, + '0'...'9' => continue, + else => break, + }, + .float => switch (c) { + '0'...'9' => { + continue; + }, + else => { + break; + }, + }, + } + } + + result.loc.end = self.index; + return result; + } }; const ComparisonValue = union { @@ -58,13 +183,10 @@ pub const FileEngine = struct { /// Take a condition and an array of UUID and fill the array with all UUID that match the condition pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) !void { - var file_names = std.ArrayList([]const u8).init(self.allocator); - self.getFilesNames(condition.struct_name, condition.member_name, &file_names) catch @panic("Can't get list of files"); - defer file_names.deinit(); + const max_file_index = try self.maxFileIndex(condition.struct_name); + var current_index: usize = 0; - var current_file = file_names.pop(); - - var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, condition.struct_name, condition.member_name, current_file }) catch @panic("Can't create sub_path for init a DataIterator"); + var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); defer self.allocator.free(sub_path); var file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator"); @@ -79,14 +201,14 @@ pub const FileEngine = struct { var compare_value: ComparisonValue = undefined; switch (condition.data_type) { - .int => compare_value = ComparisonValue{ .int = dataParsing.parseInt(condition.value) }, + .int => compare_value = ComparisonValue{ .int = parseInt(condition.value) }, .str => compare_value = ComparisonValue{ .str = condition.value }, - .float => compare_value = ComparisonValue{ .float = dataParsing.parseFloat(condition.value) }, - .bool => compare_value = ComparisonValue{ .bool_ = dataParsing.parseBool(condition.value) }, - .int_array => compare_value = ComparisonValue{ .int_array = dataParsing.parseArrayInt(self.allocator, condition.value) }, - .str_array => compare_value = ComparisonValue{ .str_array = dataParsing.parseArrayStr(self.allocator, condition.value) }, - .float_array => compare_value = ComparisonValue{ .float_array = dataParsing.parseArrayFloat(self.allocator, condition.value) }, - .bool_array => compare_value = ComparisonValue{ .bool_array = dataParsing.parseArrayBool(self.allocator, condition.value) }, + .float => compare_value = ComparisonValue{ .float = parseFloat(condition.value) }, + .bool => compare_value = ComparisonValue{ .bool_ = parseBool(condition.value) }, + .int_array => compare_value = ComparisonValue{ .int_array = parseArrayInt(self.allocator, condition.value) }, + .str_array => compare_value = ComparisonValue{ .str_array = parseArrayStr(self.allocator, condition.value) }, + .float_array => compare_value = ComparisonValue{ .float_array = parseArrayFloat(self.allocator, condition.value) }, + .bool_array => compare_value = ComparisonValue{ .bool_array = parseArrayBool(self.allocator, condition.value) }, } defer { switch (condition.data_type) { @@ -98,24 +220,27 @@ pub const FileEngine = struct { } } + var token: FileEngine.Token = undefined; + const column_index = schemaEngine.columnIndexOfMember(condition.struct_name, condition.member_name); + while (true) { output_fbs.reset(); reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) { error.EndOfStream => { output_fbs.reset(); // clear buffer before exit - self.allocator.free(current_file); - if (file_names.items.len == 0) break; + if (current_index == max_file_index) break; - current_file = file_names.pop(); + current_index += 1; - // Do I leak memory here ? Do I deinit every time ? self.allocator.free(sub_path); - sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, condition.struct_name, condition.member_name, current_file }) catch @panic("Can't create sub_path for init a DataIterator"); + sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); - // Same here, do I close everytime ? - file.close(); - file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator"); + file.close(); // Do I need to close ? I think so + file = std.fs.cwd().openFile(sub_path, .{}) catch { + std.debug.print("Error trying to open {s}\n", .{sub_path}); + @panic("Can't open first file to init a data iterator"); + }; buffered = std.io.bufferedReader(file.reader()); reader = buffered.reader(); @@ -127,58 +252,76 @@ pub const FileEngine = struct { }, }; - // TODO: Maybe put that directly inside the union type like a compare function - // Can also do the switch directly on the compare_value + // Maybe use the stream directly to prevent duplicate the data + // But I would need to change the Tokenizer a lot... + const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]); + defer self.allocator.free(null_terminated_string); + + var data_toker = Tokenizer.init(null_terminated_string); + const uuid = try UUID.parse(output_fbs.getWritten()[0..36]); + + // Skip unwanted token + for (0..column_index.?) |_| { + _ = data_toker.next(); + } + + token = data_toker.next(); + // TODO: Add error for wrong condition like superior between 2 string or array switch (condition.operation) { .equal => { switch (condition.data_type) { - .int => if (compare_value.int == dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float == dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .str => if (std.mem.eql(u8, compare_value.str, output_fbs.getWritten()[37..output_fbs.getWritten().len])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .bool => if (compare_value.bool_ == dataParsing.parseBool(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int == parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float == parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .str => if (std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .bool => if (compare_value.bool_ == parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, + .different => { switch (condition.data_type) { - .int => if (compare_value.int != dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float != dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .str => if (!std.mem.eql(u8, compare_value.str, output_fbs.getWritten()[38 .. output_fbs.getWritten().len - 1])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .bool => if (compare_value.bool_ != dataParsing.parseBool(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int != parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float != parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .str => if (!std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .bool => if (compare_value.bool_ != parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, + .superior_or_equal => { switch (condition.data_type) { - .int => if (compare_value.int <= dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float <= dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int <= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float <= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, + .superior => { switch (condition.data_type) { - .int => if (compare_value.int < dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float < dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int < parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float < parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, + .inferior_or_equal => { switch (condition.data_type) { - .int => if (compare_value.int >= dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float >= dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int >= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float >= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, + .inferior => { switch (condition.data_type) { - .int => if (compare_value.int > dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), - .float => if (compare_value.float > dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])), + .int => if (compare_value.int > parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), + .float => if (compare_value.float > parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } @@ -191,71 +334,48 @@ pub const FileEngine = struct { // Do I need multiple files too ? I mean it duplicate UUID a lot, if it's just to save a name like 'Bob', storing a long UUID is overkill // I could just use a tabular data format with separator using space - Or maybe I encode the uuid to take a minimum space as I always know it size pub fn writeEntity(self: FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID { - const uuid_str = UUID.init().format_uuid(); + const uuid = UUID.init(); - const member_names = schemaEngine.structName2structMembers(struct_name); - for (member_names) |member_name| { - const potential_file_name_to_use = try self.getFirstUsableFile(struct_name, member_name); + const potential_file_index = try self.getFirstUsableIndexFile(struct_name); + var file: std.fs.File = undefined; + defer file.close(); - if (potential_file_name_to_use) |file_name| { - defer self.allocator.free(file_name); + var path: []const u8 = undefined; + defer self.allocator.free(path); - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name, file_name }); - defer self.allocator.free(path); + if (potential_file_index) |file_index| { + path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, file_index }); + file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch @panic("=("); + } else { + const max_index = try self.maxFileIndex(struct_name); - var file = std.fs.cwd().openFile(path, .{ - .mode = .read_write, - }) catch { - std.debug.print("Error opening data file.", .{}); - continue; // TODO: Error handeling - }; - defer file.close(); - - try file.seekFromEnd(0); - try file.writer().print("{s} {s}\n", .{ uuid_str, data_map.get(member_name).? }); - } else { - const max_index = try self.maxFileIndex(struct_name, member_name); - - const new_file_path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, member_name, max_index + 1 }); - defer self.allocator.free(new_file_path); - - const new_file = std.fs.cwd().createFile(new_file_path, .{}) catch @panic("Error creating new data file"); - defer new_file.close(); - - try new_file.writer().print("{s} {s}\n", .{ &uuid_str, data_map.get(member_name).? }); - } + path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, max_index + 1 }); + file = std.fs.cwd().createFile(path, .{}) catch @panic("Error creating new data file"); } - return UUID.parse(&uuid_str); + try file.seekFromEnd(0); + try file.writer().print("{s}", .{uuid.format_uuid()}); + + const member_names = schemaEngine.structName2structMembers(struct_name); // This need to be in the same order all the time tho + for (member_names) |member_name| { + try file.writer().print(" {s}", .{data_map.get(member_name).?}); + } + + try file.writer().print("\n", .{}); + + return uuid; } /// Use a filename in the format 1.zippondata and return the 1 + /// Note that if I change the extension of the data file, I need to update that as it use a fixed len for the extension fn fileName2Index(_: FileEngine, file_name: []const u8) usize { - var iter_file_name = std.mem.tokenize(u8, file_name, "."); - const num_str = iter_file_name.next().?; - const num: usize = std.fmt.parseInt(usize, num_str, 10) catch @panic("Couln't parse the int of a zippondata file."); - return num; - } - - fn getFilesNames(self: FileEngine, struct_name: []const u8, member_name: []const u8, file_names: *std.ArrayList([]const u8)) !void { - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name }); - defer self.allocator.free(path); - - var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); - defer member_dir.close(); - - var iter = member_dir.iterate(); - defer iter.reset(); - while (try iter.next()) |entry| { - if ((entry.kind != std.fs.Dir.Entry.Kind.file) or (std.mem.eql(u8, "main.zippondata", entry.name))) continue; - try file_names.*.append(try self.allocator.dupe(u8, entry.name)); - } + return std.fmt.parseInt(usize, file_name[0..(file_name.len - 11)], 10) catch @panic("Couln't parse the int of a zippondata file."); } /// Use the map of file stat to find the first file with under the bytes limit. /// return the name of the file. If none is found, return null. - fn getFirstUsableFile(self: FileEngine, struct_name: []const u8, member_name: []const u8) !?[]const u8 { - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name }); + fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) !?usize { + const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); defer self.allocator.free(path); var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); @@ -263,18 +383,16 @@ pub const FileEngine = struct { var iter = member_dir.iterate(); while (try iter.next()) |entry| { - if ((entry.kind != std.fs.Dir.Entry.Kind.file) or (std.mem.eql(u8, "main.zippondata", entry.name))) continue; - const file_stat = try member_dir.statFile(entry.name); - if (file_stat.size < self.max_file_size) return try self.allocator.dupe(u8, entry.name); + if (file_stat.size < self.max_file_size) return self.fileName2Index(entry.name); } return null; } /// Iter over all file and get the max name and return the value of it as usize /// So for example if there is 1.zippondata and 2.zippondata it return 2. - fn maxFileIndex(self: FileEngine, struct_name: []const u8, member_name: []const u8) !usize { - const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name }); + fn maxFileIndex(self: FileEngine, struct_name: []const u8) !usize { + const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); defer self.allocator.free(path); const member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); @@ -296,23 +414,14 @@ pub const FileEngine = struct { for (schemaEngine.struct_name_list) |struct_name| { data_dir.makeDir(struct_name) catch |err| switch (err) { error.PathAlreadyExists => {}, - else => return DataEngineError.ErrorCreateStructFolder, + else => return err, }; const struct_dir = try data_dir.openDir(struct_name, .{}); - const member_names = schemaEngine.structName2structMembers(struct_name); - for (member_names) |member_name| { - struct_dir.makeDir(member_name) catch |err| switch (err) { - error.PathAlreadyExists => continue, - else => return DataEngineError.ErrorCreateMemberFolder, - }; - const member_dir = try struct_dir.openDir(member_name, .{}); - - _ = member_dir.createFile("0.zippondata", .{}) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => return DataEngineError.ErrorCreateDataFile, - }; - } + _ = struct_dir.createFile("0.zippondata", .{}) catch |err| switch (err) { + error.PathAlreadyExists => {}, + else => return err, + }; } } }; @@ -333,3 +442,126 @@ test "Open dir" { const sub_dir = try dir.openDir("src/types", .{}); _ = sub_dir; } + +// Series of functions to use just before creating an entity. +// Will transform the string of data into data of the right type./ + +pub fn parseInt(value_str: []const u8) i64 { + return std.fmt.parseInt(i64, value_str, 10) catch return 0; +} + +pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) { + var array = std.ArrayList(i64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseInt(x)) catch {}; + } + + return array; +} + +pub fn parseFloat(value_str: []const u8) f64 { + return std.fmt.parseFloat(f64, value_str) catch return 0; +} + +pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) { + var array = std.ArrayList(f64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseFloat(x)) catch {}; + } + + return array; +} + +pub fn parseBool(value_str: []const u8) bool { + return (value_str[0] != '0'); +} + +pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) { + var array = std.ArrayList(bool).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseBool(x)) catch {}; + } + + return array; +} + +// FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't +pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) { + var array = std.ArrayList([]const u8).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + const x_copy = allocator.dupe(u8, x) catch @panic("=("); + array.append(x_copy) catch {}; + } + + return array; +} + +test "Data parsing" { + const allocator = std.testing.allocator; + + // Int + const in1: [3][]const u8 = .{ "1", "42", "Hello" }; + const expected_out1: [3]i64 = .{ 1, 42, 0 }; + for (in1, 0..) |value, i| { + try std.testing.expect(parseInt(value) == expected_out1[i]); + } + + // Int array + const in2 = "[1 14 44 42 hello]"; + const out2 = parseArrayInt(allocator, in2); + defer out2.deinit(); + const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 }; + try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2)); + + // Float + const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" }; + const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 }; + for (in3, 0..) |value, i| { + try std.testing.expect(parseFloat(value) == expected_out3[i]); + } + + // Float array + const in4 = "[1.5 14.3 44.9999 42 hello]"; + const out4 = parseArrayFloat(allocator, in4); + defer out4.deinit(); + const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 }; + try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4)); + + // Bool + const in5: [3][]const u8 = .{ "1", "Hello", "0" }; + const expected_out5: [3]bool = .{ true, true, false }; + for (in5, 0..) |value, i| { + try std.testing.expect(parseBool(value) == expected_out5[i]); + } + + // Bool array + const in6 = "[1 0 0 1 1]"; + const out6 = parseArrayBool(allocator, in6); + defer out6.deinit(); + const expected_out6: [5]bool = .{ true, false, false, true, true }; + try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6)); + + // TODO: Test the string array +} + +// Test tokenizer + +test "basic query" { + try testTokenize("001 123 0185", &.{ .int_literal, .int_literal, .int_literal }); +} + +fn testTokenize(source: [:0]const u8, expected_token_tags: []const FileEngine.Token.Tag) !void { + var tokenizer = FileEngine.Tokenizer.init(source); + for (expected_token_tags) |expected_token_tag| { + const token = tokenizer.next(); + try std.testing.expectEqual(expected_token_tag, token.tag); + } +} diff --git a/src/schemaEngine.zig b/src/schemaEngine.zig index 3175727..69d11ec 100644 --- a/src/schemaEngine.zig +++ b/src/schemaEngine.zig @@ -20,6 +20,18 @@ pub const struct_type_list: [2][]const DataType = .{ &[_]DataType{.str}, }; +// use to know how much token the Parser of the FileEngine need to pass before the right one +pub fn columnIndexOfMember(struct_name: []const u8, member_name: []const u8) ?usize { + var i: u16 = 0; + + for (structName2structMembers(struct_name)) |mn| { + if (std.mem.eql(u8, mn, member_name)) return i; + i += 1; + } + + return null; +} + /// Get the type of the member pub fn memberName2DataType(struct_name: []const u8, member_name: []const u8) ?DataType { var i: u16 = 0; diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 662e2c2..eaa89a9 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -153,6 +153,7 @@ pub const Parser = struct { }, } }, + .expect_struct_name => { // Check if the struct name is in the schema self.struct_name = try self.allocator.dupe(u8, self.toker.getTokenSlice(token)); @@ -162,6 +163,7 @@ pub const Parser = struct { else => self.state = .expect_filter_or_additional_data, } }, + .expect_filter_or_additional_data => { keep_next = true; switch (token.tag) { @@ -170,10 +172,12 @@ pub const Parser = struct { else => self.printError("Error: Expect [ for additional data or { for a filter", &token), } }, + .parse_additional_data => { try self.parseAdditionalData(&self.additional_data); self.state = .filter_and_send; }, + .filter_and_send => { var array = std.ArrayList(UUID).init(self.allocator); defer array.deinit(); @@ -181,6 +185,7 @@ pub const Parser = struct { self.sendEntity(array.items); self.state = .end; }, + .expect_new_data => { switch (token.tag) { .l_paren => { @@ -190,6 +195,7 @@ pub const Parser = struct { else => self.printError("Error: Expecting new data starting with (", &token), } }, + .parse_new_data_and_add_data => { switch (self.action) { .ADD => { @@ -210,6 +216,7 @@ pub const Parser = struct { else => unreachable, } }, + else => unreachable, } } @@ -231,9 +238,8 @@ pub const Parser = struct { _ = self; } - /// Take an array of UUID and populate it to be the array that represent filter between {} - /// Main is to know if between {} or (), main is true if between {} or the first to be call - /// TODO: Create a parseCondition + /// Take an array of UUID and populate it with what match what is between {} + /// Main is to know if between {} or (), main is true if between {}, otherwise between () inside {} fn parseFilter(self: *Parser, left_array: *std.ArrayList(UUID), struct_name: []const u8, main: bool) !void { var token = self.toker.next(); var keep_next = false; @@ -253,6 +259,7 @@ pub const Parser = struct { self.state = State.expect_ANDOR_OR_end; keep_next = true; }, + .expect_ANDOR_OR_end => { switch (token.tag) { .r_brace => { @@ -280,6 +287,7 @@ pub const Parser = struct { else => self.printError("Error: Expected a condition including AND or OR or } or )", &token), } }, + .expect_right_uuid_array => { var right_array = std.ArrayList(UUID).init(self.allocator); defer right_array.deinit(); @@ -307,14 +315,17 @@ pub const Parser = struct { std.debug.print("Token here {any}\n", .{token}); self.state = .expect_ANDOR_OR_end; }, + else => unreachable, } } } + /// Parse to get a Condition< Which is a struct that is use by the FileEngine to retreive data. + /// In the query, it is this part name = 'Bob' or age <= 10 fn parseCondition(self: *Parser, condition: *Condition, token_ptr: *Token) Token { var keep_next = false; - self.state = State.expect_member; + self.state = .expect_member; var token = token_ptr.*; while (self.state != State.end) : ({ @@ -335,6 +346,7 @@ pub const Parser = struct { else => self.printError("Error: Expected member name.", &token), } }, + .expect_operation => { switch (token.tag) { .equal => condition.operation = .equal, // = @@ -347,6 +359,7 @@ pub const Parser = struct { } self.state = State.expect_value; }, + .expect_value => { switch (condition.data_type) { .int => { @@ -420,13 +433,14 @@ pub const Parser = struct { } self.state = .end; }, + else => unreachable, } } return token; } - /// When this function is call, the tokenizer last token retrieved should be [. + /// When this function is call, nect token should be [ /// Check if an int is here -> check if ; is here -> check if member is here -> check if [ is here -> loop fn parseAdditionalData(self: *Parser, additional_data: *AdditionalData) !void { var token = self.toker.next(); @@ -455,6 +469,7 @@ pub const Parser = struct { }, } }, + .expect_semicolon_OR_right_bracket => { switch (token.tag) { .semicolon => self.state = .expect_member, @@ -462,6 +477,7 @@ pub const Parser = struct { else => self.printError("Error: Expect ';' or ']'.", &token), } }, + .expect_member => { switch (token.tag) { .identifier => { @@ -478,6 +494,7 @@ pub const Parser = struct { else => self.printError("Error: Expected a member name.", &token), } }, + .expect_comma_OR_r_bracket_OR_l_bracket => { switch (token.tag) { .comma => self.state = .expect_member, @@ -491,6 +508,7 @@ pub const Parser = struct { else => self.printError("Error: Expected , or ] or [", &token), } }, + .expect_comma_OR_r_bracket => { switch (token.tag) { .comma => self.state = .expect_member, @@ -498,6 +516,7 @@ pub const Parser = struct { else => self.printError("Error: Expected , or ]", &token), } }, + else => unreachable, } } @@ -527,6 +546,7 @@ pub const Parser = struct { else => self.printError("Error: Expected member name.", &token), } }, + .expect_equal => { switch (token.tag) { // TODO: Add more comparison like IN or other stuff @@ -534,14 +554,15 @@ pub const Parser = struct { else => self.printError("Error: Expected =", &token), } }, + .expect_new_value => { const data_type = schemaEngine.memberName2DataType(self.struct_name, member_name); switch (data_type.?) { .int => { switch (token.tag) { .int_literal, .keyword_null => { - keep_next = true; - self.state = .add_member_to_map; + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; }, else => self.printError("Error: Expected int", &token), } @@ -549,17 +570,25 @@ pub const Parser = struct { .float => { switch (token.tag) { .float_literal, .keyword_null => { - keep_next = true; - self.state = .add_member_to_map; + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; }, else => self.printError("Error: Expected float", &token), } }, .bool => { switch (token.tag) { - .bool_literal_true, .bool_literal_false, .keyword_null => { - keep_next = true; - self.state = .add_member_to_map; + .bool_literal_true => { + member_map.put(member_name, "1") catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + .bool_literal_false => { + member_map.put(member_name, "0") catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; + }, + .keyword_null => { + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; }, else => self.printError("Error: Expected bool: true false", &token), } @@ -567,8 +596,8 @@ pub const Parser = struct { .str => { switch (token.tag) { .string_literal, .keyword_null => { - keep_next = true; - self.state = .add_member_to_map; + member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); + self.state = .expect_comma_OR_end; }, else => self.printError("Error: Expected string between ''", &token), } @@ -648,11 +677,7 @@ pub const Parser = struct { }, } }, - .add_member_to_map => { - member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember"); - self.state = .expect_comma_OR_end; - }, - .add_array_to_map => {}, + .expect_comma_OR_end => { switch (token.tag) { .r_paren => self.state = .end, @@ -660,6 +685,7 @@ pub const Parser = struct { else => self.printError("Error: Expect , or )", &token), } }, + else => unreachable, } }