diff --git a/build.zig b/build.zig index 0252e60..8061f72 100644 --- a/build.zig +++ b/build.zig @@ -74,10 +74,22 @@ pub fn build(b: *std.Build) void { tests5.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData")); const run_tests5 = b.addRunArtifact(tests5); + const tests6 = b.addTest(.{ + .root_source_file = b.path("src/stuffs/filter.zig"), + .target = target, + .optimize = optimize, + .name = "Filter tree", + .test_runner = b.path("test_runner.zig"), + }); + tests6.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") })); + tests6.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData")); + const run_tests6 = b.addRunArtifact(tests6); + const test_step = b.step("test", "Run unit tests"); test_step.dependOn(&run_tests1.step); test_step.dependOn(&run_tests2.step); test_step.dependOn(&run_tests3.step); test_step.dependOn(&run_tests4.step); test_step.dependOn(&run_tests5.step); + test_step.dependOn(&run_tests6.step); } diff --git a/build.zig.zon b/build.zig.zon index 70110ec..ecdd1eb 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -3,8 +3,8 @@ .version = "0.1.4", .dependencies = .{ .ZipponData = .{ - .url = "git+https://github.com/MrBounty/ZipponData", - .hash = "12200f2c24233d72c195c1daf04d07f3af22418593f7bbe8a75e54fe6817866f4564", + .url = "git+https://github.com/MrBounty/ZipponData#2ec9cc00e0d798e741d63f91cde18af0f9bf1bce", + .hash = "12206c4cac549a5d1beab62fe1c45388cec0bcc5aac96da8175eccd8abbeb6d41913", }, }, .paths = .{ diff --git a/lib/types/stringToType.zig b/lib/types/stringToType.zig index 6a11dfe..9c54aa0 100644 --- a/lib/types/stringToType.zig +++ b/lib/types/stringToType.zig @@ -2,13 +2,15 @@ const std = @import("std"); const UUID = @import("uuid.zig").UUID; const DateTime = @import("date.zig").DateTime; +// FIXME: Stop returning arrayList and use toOwnedSlice instead + // TODO: Put those functions somewhere else -pub fn parseInt(value_str: []const u8) i64 { - return std.fmt.parseInt(i64, value_str, 10) catch return 0; +pub fn parseInt(value_str: []const u8) i32 { + return std.fmt.parseInt(i32, value_str, 10) catch return 0; } -pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) { - var array = std.ArrayList(i64).init(allocator); +pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i32) { + var array = std.ArrayList(i32).init(allocator); var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); while (it.next()) |x| { @@ -57,6 +59,17 @@ pub fn parseArrayDate(allocator: std.mem.Allocator, array_str: []const u8) std.A return array; } +pub fn parseArrayDateUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) { + var array = std.ArrayList(u64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseDate(x).toUnix()) catch {}; + } + + return array; +} + pub fn parseTime(value_str: []const u8) DateTime { const hours: u16 = std.fmt.parseInt(u16, value_str[0..2], 10) catch 0; const minutes: u16 = std.fmt.parseInt(u16, value_str[3..5], 10) catch 0; @@ -77,6 +90,17 @@ pub fn parseArrayTime(allocator: std.mem.Allocator, array_str: []const u8) std.A return array; } +pub fn parseArrayTimeUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) { + var array = std.ArrayList(u64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseTime(x).toUnix()) catch {}; + } + + return array; +} + pub fn parseDatetime(value_str: []const u8) DateTime { const year: u16 = std.fmt.parseInt(u16, value_str[0..4], 10) catch 0; const month: u16 = std.fmt.parseInt(u16, value_str[5..7], 10) catch 0; @@ -100,6 +124,17 @@ pub fn parseArrayDatetime(allocator: std.mem.Allocator, array_str: []const u8) s return array; } +pub fn parseArrayDatetimeUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) { + var array = std.ArrayList(u64).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + array.append(parseDatetime(x).toUnix()) catch {}; + } + + return array; +} + pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) { var array = std.ArrayList(bool).init(allocator); @@ -123,6 +158,18 @@ pub fn parseArrayUUID(allocator: std.mem.Allocator, array_str: []const u8) std.A return array; } +pub fn parseArrayUUIDBytes(allocator: std.mem.Allocator, array_str: []const u8) ![]const [16]u8 { + var array = std.ArrayList([16]u8).init(allocator); + + var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); + while (it.next()) |x| { + const uuid = UUID.parse(x) catch continue; + array.append(uuid.bytes) catch continue; + } + + return try array.toOwnedSlice(); +} + // FIXME: I think it will not work if there is a ' inside the string, even \', need to fix that pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) { var array = std.ArrayList([]const u8).init(allocator); @@ -135,7 +182,7 @@ pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.Ar array.append(x_copy) catch {}; } - allocator.free(array.pop()); // Remove the last because empty like the first one + if (array.items.len > 0) allocator.free(array.pop()); // Remove the last because empty like the first one return array; } @@ -145,7 +192,7 @@ test "Value parsing: Int" { // Int const values: [3][]const u8 = .{ "1", "42", "Hello" }; - const expected_values: [3]i64 = .{ 1, 42, 0 }; + const expected_values: [3]i32 = .{ 1, 42, 0 }; for (values, 0..) |value, i| { try std.testing.expect(parseInt(value) == expected_values[i]); } @@ -154,8 +201,8 @@ test "Value parsing: Int" { const array_str = "[1 14 44 42 hello]"; const array = parseArrayInt(allocator, array_str); defer array.deinit(); - const expected_array: [5]i64 = .{ 1, 14, 44, 42, 0 }; - try std.testing.expect(std.mem.eql(i64, array.items, &expected_array)); + const expected_array: [5]i32 = .{ 1, 14, 44, 42, 0 }; + try std.testing.expect(std.mem.eql(i32, array.items, &expected_array)); } test "Value parsing: Float" { diff --git a/src/fileEngine.zig b/src/fileEngine.zig index 27b260f..7458ae2 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -76,21 +76,6 @@ pub const FileEngine = struct { return !std.mem.eql(u8, "", self.path_to_ZipponDB_dir); } - const ComparisonValue = union { - int: i64, - float: f64, - str: []const u8, - bool_: bool, - link: UUID, - datetime: DateTime, - int_array: std.ArrayList(i64), - str_array: std.ArrayList([]const u8), - float_array: std.ArrayList(f64), - bool_array: std.ArrayList(bool), - link_array: std.ArrayList(UUID), - datetime_array: std.ArrayList(DateTime), - }; - // --------------------Other-------------------- pub fn readSchemaFile(allocator: Allocator, sub_path: []const u8, buffer: []u8) FileEngineError!usize { @@ -228,10 +213,7 @@ pub const FileEngine = struct { }; const struct_dir = data_dir.openDir(schema_struct.name, .{}) catch return FileEngineError.CantOpenDir; - _ = struct_dir.createFile("0.csv", .{}) catch |err| switch (err) { - error.PathAlreadyExists => {}, - else => return FileEngineError.CantMakeFile, - }; + zid.createFile("0.zid", struct_dir) catch return FileEngineError.CantMakeFile; } try self.writeSchemaFile(); @@ -442,241 +424,33 @@ pub const FileEngine = struct { } /// Take a condition and an array of UUID and fill the array with all UUID that match the condition - /// TODO: Change the UUID function to be a B+Tree - /// TODO: Optimize the shit out of this, it it way too slow rn. Here some ideas - /// - Make multiple condition per row - /// - Array can take a very long time to parse, maybe put them in a seperate file. But string can be too... - /// - Use the stream directly in the tokenizer - /// - Use a fixed size and split into other file. Like one file for one member (Because very long, like an array of 1000 value) and another one for everything else - /// The threselhold can be like if the average len is > 400 character. So UUID would take less that 10% of the storage - /// - Save data in a more compact way - /// - Multithreading, each thread take a list of files and we mix them at the end - pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) FileEngineError!void { - const max_file_index = try self.maxFileIndex(condition.struct_name); - var current_index: usize = 0; - - var path_buff = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.csv", - .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }, - ) catch return FileEngineError.MemoryError; - defer self.allocator.free(path_buff); - - var file = std.fs.cwd().openFile(path_buff, .{}) catch return FileEngineError.CantOpenFile; - defer file.close(); - - var output: [BUFFER_SIZE]u8 = undefined; - var output_fbs = std.io.fixedBufferStream(&output); - const writer = output_fbs.writer(); - - var buffered = std.io.bufferedReader(file.reader()); - var reader = buffered.reader(); - - var compare_value: ComparisonValue = undefined; - switch (condition.data_type) { - .int => compare_value = ComparisonValue{ .int = s2t.parseInt(condition.value) }, - .str => compare_value = ComparisonValue{ .str = condition.value }, - .float => compare_value = ComparisonValue{ .float = s2t.parseFloat(condition.value) }, - .bool => compare_value = ComparisonValue{ .bool_ = s2t.parseBool(condition.value) }, - .link => compare_value = ComparisonValue{ .link = UUID.parse(condition.value) catch return FileEngineError.InvalidUUID }, - .date => compare_value = ComparisonValue{ .datetime = s2t.parseDate(condition.value) }, - .time => compare_value = ComparisonValue{ .datetime = s2t.parseTime(condition.value) }, - .datetime => compare_value = ComparisonValue{ .datetime = s2t.parseDatetime(condition.value) }, - .int_array => compare_value = ComparisonValue{ .int_array = s2t.parseArrayInt(self.allocator, condition.value) }, - .str_array => compare_value = ComparisonValue{ .str_array = s2t.parseArrayStr(self.allocator, condition.value) }, - .float_array => compare_value = ComparisonValue{ .float_array = s2t.parseArrayFloat(self.allocator, condition.value) }, - .bool_array => compare_value = ComparisonValue{ .bool_array = s2t.parseArrayBool(self.allocator, condition.value) }, - .link_array => compare_value = ComparisonValue{ .link_array = s2t.parseArrayUUID(self.allocator, condition.value) }, - .date_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayDate(self.allocator, condition.value) }, - .time_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayTime(self.allocator, condition.value) }, - .datetime_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayDatetime(self.allocator, condition.value) }, - } - defer { - switch (condition.data_type) { - .int_array => compare_value.int_array.deinit(), - .str_array => { - for (compare_value.str_array.items) |value| self.allocator.free(value); // TODO: Remove that, I should need to free them one by one as condition.value keep it in memory - compare_value.str_array.deinit(); - }, - .float_array => compare_value.float_array.deinit(), - .bool_array => compare_value.bool_array.deinit(), - .link_array => compare_value.link_array.deinit(), - .datetime_array => compare_value.datetime_array.deinit(), - else => {}, - } - } - - var token: FileToken = undefined; - var found = false; - - while (true) { - output_fbs.reset(); - reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) { - error.EndOfStream => { - // When end of file, check if all file was parse, if not update the reader to the next file - // TODO: Be able to give an array of file index from the B+Tree to only parse them - output_fbs.reset(); // clear buffer before exit - - if (current_index == max_file_index) break; - - current_index += 1; - - self.allocator.free(path_buff); - path_buff = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.csv", - .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }, - ) catch return FileEngineError.MemoryError; - - file.close(); // Do I need to close ? I think so - file = std.fs.cwd().openFile(path_buff, .{}) catch return FileEngineError.CantOpenFile; - - buffered = std.io.bufferedReader(file.reader()); - reader = buffered.reader(); - continue; - }, // file read till the end - else => return FileEngineError.StreamError, - }; - - // Maybe use the stream directly to prevent duplicate the data - // But I would need to change the Tokenizer a lot... - const null_terminated_string = self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]) catch return FileEngineError.MemoryError; - defer self.allocator.free(null_terminated_string); - - var data_toker = FileTokenizer.init(null_terminated_string); - const uuid = UUID.parse(output_fbs.getWritten()[0..36]) catch return FileEngineError.InvalidUUID; - - // Skip unwanted token - for (try self.structName2structMembers(condition.struct_name)) |member_name| { - if (std.mem.eql(u8, member_name, condition.member_name)) break; - _ = data_toker.next(); - } - token = data_toker.next(); - - const row_value = data_toker.getTokenSlice(token); - - found = switch (condition.operation) { - .equal => switch (condition.data_type) { - .int => compare_value.int == s2t.parseInt(row_value), - .float => compare_value.float == s2t.parseFloat(row_value), - .str => std.mem.eql(u8, compare_value.str, row_value), - .bool => compare_value.bool_ == s2t.parseBool(row_value), - .link => compare_value.link.compare(uuid), - .date => compare_value.datetime.compareDate(s2t.parseDate(row_value)), - .time => compare_value.datetime.compareTime(s2t.parseTime(row_value)), - .datetime => compare_value.datetime.compareDatetime(s2t.parseDatetime(row_value)), - else => unreachable, - }, - - .different => switch (condition.data_type) { - .int => compare_value.int != s2t.parseInt(row_value), - .float => compare_value.float != s2t.parseFloat(row_value), - .str => !std.mem.eql(u8, compare_value.str, row_value), - .bool => compare_value.bool_ != s2t.parseBool(row_value), - .link => !compare_value.link.compare(uuid), - .date => !compare_value.datetime.compareDate(s2t.parseDate(row_value)), - .time => !compare_value.datetime.compareTime(s2t.parseTime(row_value)), - .datetime => !compare_value.datetime.compareDatetime(s2t.parseDatetime(row_value)), - else => unreachable, - }, - - .superior_or_equal => switch (condition.data_type) { - .int => compare_value.int <= s2t.parseInt(data_toker.getTokenSlice(token)), - .float => compare_value.float <= s2t.parseFloat(data_toker.getTokenSlice(token)), - .date => compare_value.datetime.toUnix() <= s2t.parseDate(row_value).toUnix(), - .time => compare_value.datetime.toUnix() <= s2t.parseTime(row_value).toUnix(), - .datetime => compare_value.datetime.toUnix() <= s2t.parseDatetime(row_value).toUnix(), - else => unreachable, - }, - - .superior => switch (condition.data_type) { - .int => compare_value.int < s2t.parseInt(data_toker.getTokenSlice(token)), - .float => compare_value.float < s2t.parseFloat(data_toker.getTokenSlice(token)), - .date => compare_value.datetime.toUnix() < s2t.parseDate(row_value).toUnix(), - .time => compare_value.datetime.toUnix() < s2t.parseTime(row_value).toUnix(), - .datetime => compare_value.datetime.toUnix() < s2t.parseDatetime(row_value).toUnix(), - else => unreachable, - }, - - .inferior_or_equal => switch (condition.data_type) { - .int => compare_value.int >= s2t.parseInt(data_toker.getTokenSlice(token)), - .float => compare_value.float >= s2t.parseFloat(data_toker.getTokenSlice(token)), - .date => compare_value.datetime.toUnix() >= s2t.parseDate(row_value).toUnix(), - .time => compare_value.datetime.toUnix() >= s2t.parseTime(row_value).toUnix(), - .datetime => compare_value.datetime.toUnix() >= s2t.parseDatetime(row_value).toUnix(), - else => unreachable, - }, - - .inferior => switch (condition.data_type) { - .int => compare_value.int > s2t.parseInt(data_toker.getTokenSlice(token)), - .float => compare_value.float > s2t.parseFloat(data_toker.getTokenSlice(token)), - .date => compare_value.datetime.toUnix() > s2t.parseDate(row_value).toUnix(), - .time => compare_value.datetime.toUnix() > s2t.parseTime(row_value).toUnix(), - .datetime => compare_value.datetime.toUnix() > s2t.parseDatetime(row_value).toUnix(), - else => unreachable, - }, - - else => false, - }; - - // TODO: Do it for other array and implement in the query language - switch (condition.operation) { - .in => switch (condition.data_type) { - .link_array => { - for (compare_value.link_array.items) |elem| { - if (elem.compare(uuid)) uuid_array.append(uuid) catch return FileEngineError.MemoryError; - } - }, - else => unreachable, - }, - else => {}, - } - - if (found) uuid_array.append(uuid) catch return FileEngineError.MemoryError; - } + /// TODO: Use the new filter and DataIterator + pub fn getUUIDListUsingCondition(_: *FileEngine, _: Condition, _: *std.ArrayList(UUID)) FileEngineError!void { + return; } // --------------------Change existing files-------------------- - // TODO: Change map to use a []Data from ZipponData - pub fn writeEntity(self: *FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) FileEngineError!UUID { + // TODO: Make it in batch too + pub fn writeEntity(self: *FileEngine, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError!UUID { const uuid = UUID.init(); - const potential_file_index = try self.getFirstUsableIndexFile(struct_name); - var file: std.fs.File = undefined; - defer file.close(); + const file_index = try self.getFirstUsableIndexFile(struct_name); - var path: []const u8 = undefined; + const path = std.fmt.allocPrint( + self.allocator, + "{s}/DATA/{s}/{d}.zid", + .{ self.path_to_ZipponDB_dir, struct_name, file_index }, + ) catch return FileEngineError.MemoryError; defer self.allocator.free(path); - if (potential_file_index) |file_index| { - path = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.csv", - .{ self.path_to_ZipponDB_dir, struct_name, file_index }, - ) catch return FileEngineError.MemoryError; - file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch return FileEngineError.CantOpenFile; - } else { - const max_index = try self.maxFileIndex(struct_name); + var arena = std.heap.ArenaAllocator.init(self.allocator); + defer arena.deinit(); + const data = try self.orderedNewData(arena.allocator(), struct_name, map); - path = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.csv", - .{ self.path_to_ZipponDB_dir, struct_name, max_index + 1 }, - ) catch return FileEngineError.MemoryError; - file = std.fs.cwd().createFile(path, .{}) catch return FileEngineError.CantMakeFile; - } - - file.seekFromEnd(0) catch return FileEngineError.WriteError; // Not really a write error tho - const writer = file.writer(); - writer.print("{s}", .{uuid.format_uuid()}) catch return FileEngineError.WriteError; - - for (try self.structName2structMembers(struct_name)) |member_name| { - writer.writeByte(CSV_DELIMITER) catch return FileEngineError.WriteError; - writer.print("{s}", .{data_map.get(member_name).?}) catch return FileEngineError.WriteError; // Change that for csv - } - - writer.print("\n", .{}) catch return FileEngineError.WriteError; + var writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError; + writer.write(data) catch return FileEngineError.ZipponDataError; + writer.flush() catch return FileEngineError.ZipponDataError; return uuid; } @@ -994,13 +768,89 @@ pub const FileEngine = struct { return deleted_count; } + // --------------------ZipponData utils-------------------- + + // Function that take a map from the parseNewData and return an ordered array of Data + pub fn orderedNewData(self: *FileEngine, allocator: Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError![]const zid.Data { + const members = try self.structName2structMembers(struct_name); + const types = try self.structName2DataType(struct_name); + + var datas = allocator.alloc(zid.Data, members.len) catch return FileEngineError.MemoryError; + + for (members, types, 0..) |member, dt, i| { + switch (dt) { + .int => datas[i] = zid.Data.initInt(s2t.parseInt(map.get(member).?)), + .float => datas[i] = zid.Data.initFloat(s2t.parseFloat(map.get(member).?)), + .bool => datas[i] = zid.Data.initBool(s2t.parseBool(map.get(member).?)), + .date => datas[i] = zid.Data.initUnix(s2t.parseDate(map.get(member).?).toUnix()), + .time => datas[i] = zid.Data.initUnix(s2t.parseTime(map.get(member).?).toUnix()), + .datetime => datas[i] = zid.Data.initUnix(s2t.parseDatetime(map.get(member).?).toUnix()), + .str => datas[i] = zid.Data.initStr(map.get(member).?), + .link => { + const uuid = UUID.parse(map.get(member).?) catch return FileEngineError.InvalidUUID; + datas[i] = zid.Data{ .UUID = uuid.bytes }; + }, + .int_array => { + var array = s2t.parseArrayInt(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initIntArray(zid.allocEncodArray.Int(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .float_array => { + var array = s2t.parseArrayFloat(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initFloatArray(zid.allocEncodArray.Float(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .str_array => { + var array = s2t.parseArrayStr(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initStrArray(zid.allocEncodArray.Str(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .bool_array => { + var array = s2t.parseArrayBool(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initFloatArray(zid.allocEncodArray.Bool(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .link_array => { + const array = s2t.parseArrayUUIDBytes(allocator, map.get(member).?) catch return FileEngineError.MemoryError; + defer self.allocator.free(array); + + datas[i] = zid.Data.initUUIDArray(zid.allocEncodArray.UUID(allocator, array) catch return FileEngineError.AllocEncodError); + }, + .date_array => { + var array = s2t.parseArrayDateUnix(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .time_array => { + var array = s2t.parseArrayTimeUnix(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + .datetime_array => { + var array = s2t.parseArrayDatetimeUnix(allocator, map.get(member).?); + defer array.deinit(); + + datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError); + }, + } + } + + return datas; + } + // --------------------Schema utils-------------------- - /// Get the index of the first file that is bellow the size limit. If not found, return null - fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!?usize { + /// Get the index of the first file that is bellow the size limit. If not found, create a new file + fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!usize { log.debug("Getting first usable index file for {s} at {s}", .{ struct_name, self.path_to_ZipponDB_dir }); - const path = std.fmt.allocPrint( + var path = std.fmt.allocPrint( self.allocator, "{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, struct_name }, @@ -1010,14 +860,27 @@ pub const FileEngine = struct { var member_dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return FileEngineError.CantOpenDir; defer member_dir.close(); + var i: usize = 0; var iter = member_dir.iterate(); while (iter.next() catch return FileEngineError.DirIterError) |entry| { + i += 1; const file_stat = member_dir.statFile(entry.name) catch return FileEngineError.FileStatError; if (file_stat.size < MAX_FILE_SIZE) { - return std.fmt.parseInt(usize, entry.name[0..(entry.name.len - 4)], 10) catch return FileEngineError.InvalidFileIndex; // TODO: Change the slice when start using CSV + // Cant I just return i ? It is supossed that files are ordered. I think I already check and it is not + return std.fmt.parseInt(usize, entry.name[0..(entry.name.len - 4)], 10) catch return FileEngineError.InvalidFileIndex; // INFO: Hardcoded len of file extension } } - return null; + + i += 1; + path = std.fmt.allocPrint( + self.allocator, + "{s}/DATA/{s}/{d}.zid", + .{ self.path_to_ZipponDB_dir, struct_name, i }, + ) catch return FileEngineError.MemoryError; + + zid.createFile(path, null) catch return FileEngineError.ZipponDataError; + + return i; } /// Iterate over all file of a struct and return the index of the last file. diff --git a/src/stuffs/errors.zig b/src/stuffs/errors.zig index 2c7ae8a..31cda9e 100644 --- a/src/stuffs/errors.zig +++ b/src/stuffs/errors.zig @@ -10,6 +10,7 @@ pub const ZiQlParserError = error{ ConditionError, WriteError, AndOrError, + CantWriteEntity, }; pub const SchemaParserError = error{ @@ -41,6 +42,8 @@ pub const FileEngineError = error{ RenameFileError, StructNotFound, MemberNotFound, + ZipponDataError, + AllocEncodError, }; pub const ZipponError = ZiQlParserError || FileEngineError || SchemaParserError; diff --git a/src/stuffs/filter.zig b/src/stuffs/filter.zig index 49f18f5..bea0191 100644 --- a/src/stuffs/filter.zig +++ b/src/stuffs/filter.zig @@ -9,8 +9,12 @@ // For {(name = 'Adrien' OR name = 'Bob') AND (age > 80 OR age < 20)} const std = @import("std"); +const s2t = @import("dtype").s2t; const ZipponError = @import("errors.zig").ZipponError; const DataType = @import("dtype").DataType; +const DateTime = @import("dtype").DateTime; +const UUID = @import("dtype").UUID; +const Data = @import("ZipponData").Data; const ComparisonOperator = enum { equal, @@ -46,11 +50,99 @@ const LogicalOperator = enum { } }; +pub const ConditionValue = union(enum) { + int: i32, + float: f64, + str: []const u8, + bool_: bool, + link: UUID, + unix: u64, + int_array: std.ArrayList(i32), + str_array: std.ArrayList([]const u8), + float_array: std.ArrayList(f64), + bool_array: std.ArrayList(bool), + link_array: std.ArrayList(UUID), + unix_array: std.ArrayList(u64), + + pub fn deinit(self: ConditionValue) void { + switch (self) { + .int_array => self.int_array.deinit(), + .str_array => self.str_array.deinit(), + .float_array => self.float_array.deinit(), + .bool_array => self.bool_array.deinit(), + .link_array => self.link_array.deinit(), + .unix_array => self.unix_array.deinit(), + else => {}, + } + } + + pub fn initInt(value: []const u8) ConditionValue { + return ConditionValue{ .int = s2t.parseInt(value) }; + } + + pub fn initFloat(value: []const u8) ConditionValue { + return ConditionValue{ .float = s2t.parseFloat(value) }; + } + + pub fn initStr(value: []const u8) ConditionValue { + return ConditionValue{ .str = value }; + } + + pub fn initBool(value: []const u8) ConditionValue { + return ConditionValue{ .bool_ = s2t.parseBool(value) }; + } + + pub fn initDate(value: []const u8) ConditionValue { + return ConditionValue{ .unix = s2t.parseDate(value).toUnix() }; + } + + pub fn initTime(value: []const u8) ConditionValue { + return ConditionValue{ .unix = s2t.parseTime(value).toUnix() }; + } + + pub fn initDateTime(value: []const u8) ConditionValue { + return ConditionValue{ .unix = s2t.parseDatetime(value).toUnix() }; + } + + // Array + pub fn initArrayInt(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .int_array = s2t.parseArrayInt(allocator, value) }; + } + + pub fn initArrayFloat(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .float_array = s2t.parseArrayFloat(allocator, value) }; + } + + pub fn initArrayStr(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .str_array = s2t.parseArrayStr(allocator, value) }; + } + + pub fn initArrayBool(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .bool_array = s2t.parseArrayBool(allocator, value) }; + } + + pub fn initArrayDate(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .unix_array = s2t.parseArrayDateUnix(allocator, value) }; + } + + pub fn initArrayTime(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .unix_array = s2t.parseArrayTimeUnix(allocator, value) }; + } + + pub fn initArrayDateTime(allocator: std.mem.Allocator, value: []const u8) ConditionValue { + return ConditionValue{ .unix_array = s2t.parseArrayDatetimeUnix(allocator, value) }; + } +}; + pub const Condition = struct { - value: []const u8 = undefined, + value: ConditionValue = undefined, operation: ComparisonOperator = undefined, data_type: DataType = undefined, data_index: usize = undefined, // Index in the file + + pub fn deinit(self: Condition) void { + self.value.deinit(); + } }; const FilterNode = union(enum) { @@ -76,11 +168,25 @@ pub const Filter = struct { pub fn deinit(self: *Filter) void { switch (self.root.*) { .logical => self.freeNode(self.root), + .condition => |condition| condition.deinit(), else => {}, } self.allocator.destroy(self.root); } + fn freeNode(self: *Filter, node: *FilterNode) void { + switch (node.*) { + .logical => |logical| { + self.freeNode(logical.left); + self.freeNode(logical.right); + self.allocator.destroy(logical.left); + self.allocator.destroy(logical.right); + }, + .condition => |condition| condition.deinit(), + .empty => {}, + } + } + pub fn addCondition(self: *Filter, condition: Condition) ZipponError!void { const node = self.allocator.create(FilterNode) catch return ZipponError.MemoryError; node.* = FilterNode{ .condition = condition }; @@ -140,44 +246,72 @@ pub const Filter = struct { } } - fn freeNode(self: *Filter, node: *FilterNode) void { - switch (node.*) { - .logical => |logical| { - self.freeNode(logical.left); - self.freeNode(logical.right); - self.allocator.destroy(logical.left); - self.allocator.destroy(logical.right); - }, - .condition => {}, - .empty => {}, - } - } - // TODO: Use []Data and make it work - pub fn evaluate(self: *const Filter, row: anytype) bool { - return self.evaluateNode(&self.root, row); + pub fn evaluate(self: Filter, row: []Data) bool { + return self.evaluateNode(self.root, row); } - fn evaluateNode(self: *const Filter, node: *const FilterNode, row: anytype) bool { + fn evaluateNode(self: Filter, node: *FilterNode, row: []Data) bool { return switch (node.*) { - .condition => |cond| self.evaluateCondition(cond, row), + .condition => |cond| Filter.evaluateCondition(cond, row), .logical => |log| switch (log.operator) { .AND => self.evaluateNode(log.left, row) and self.evaluateNode(log.right, row), .OR => self.evaluateNode(log.left, row) or self.evaluateNode(log.right, row), }, + .empty => unreachable, // FIXME: I think this is reachable. At least if this is the root node, so it return always true. Like in the query GRAB User {} }; } - fn evaluateCondition(condition: Condition, row: anytype) bool { - const field_value = @field(row, condition.member_name); + fn evaluateCondition(condition: Condition, row: []Data) bool { + const row_value: Data = row[condition.data_index]; return switch (condition.operation) { - .equal => std.mem.eql(u8, field_value, condition.value), - .different => !std.mem.eql(u8, field_value, condition.value), - .superior => field_value > condition.value, - .superior_or_equal => field_value >= condition.value, - .inferior => field_value < condition.value, - .inferior_or_equal => field_value <= condition.value, - .in => @panic("Not implemented"), // Implement this based on your needs + .equal => switch (condition.data_type) { + .int => row_value.Int == condition.value.int, + .float => row_value.Float == condition.value.float, + .str => std.mem.eql(u8, row_value.Str, condition.value.str), + .bool => row_value.Bool == condition.value.bool_, + .date, .time, .datetime => row_value.Unix == condition.value.unix, + else => unreachable, + }, + + .different => switch (condition.data_type) { + .int => row_value.Int != condition.value.int, + .float => row_value.Float != condition.value.float, + .str => !std.mem.eql(u8, row_value.Str, condition.value.str), + .bool => row_value.Bool != condition.value.bool_, + .date, .time, .datetime => row_value.Unix != condition.value.unix, + else => unreachable, + }, + + .superior_or_equal => switch (condition.data_type) { + .int => row_value.Int <= condition.value.int, + .float => row_value.Float <= condition.value.float, + .date, .time, .datetime => row_value.Unix <= condition.value.unix, + else => unreachable, + }, + + .superior => switch (condition.data_type) { + .int => row_value.Int < condition.value.int, + .float => row_value.Float < condition.value.float, + .date, .time, .datetime => row_value.Unix < condition.value.unix, + else => unreachable, + }, + + .inferior_or_equal => switch (condition.data_type) { + .int => row_value.Int >= condition.value.int, + .float => row_value.Float >= condition.value.float, + .date, .time, .datetime => row_value.Unix >= condition.value.unix, + else => unreachable, + }, + + .inferior => switch (condition.data_type) { + .int => row_value.Int > condition.value.int, + .float => row_value.Float > condition.value.float, + .date, .time, .datetime => row_value.Unix > condition.value.unix, + else => unreachable, + }, + + else => false, }; } @@ -195,7 +329,7 @@ pub const Filter = struct { self.printNode(logical.right.*); std.debug.print(" ) ", .{}); }, - .condition => |condition| std.debug.print("{d} {s} {s} |{any}|", .{ + .condition => |condition| std.debug.print("{d} {s} {any} |{any}|", .{ condition.data_index, condition.operation.str(), condition.value, @@ -205,3 +339,24 @@ pub const Filter = struct { } } }; + +test "Evaluate" { + const allocator = std.testing.allocator; + + var data = [_]Data{ + Data.initInt(1), + Data.initFloat(3.14159), + Data.initInt(-5), + Data.initStr("Hello world"), + Data.initBool(true), + }; + + var filter = try Filter.init(allocator); + defer filter.deinit(); + + try filter.addCondition(Condition{ .value = ConditionValue.initInt("1"), .data_index = 0, .operation = .equal, .data_type = .int }); + + filter.debugPrint(); + + _ = filter.evaluate(&data); +} diff --git a/src/stuffs/utils.zig b/src/stuffs/utils.zig index cfdfe13..1272714 100644 --- a/src/stuffs/utils.zig +++ b/src/stuffs/utils.zig @@ -77,7 +77,7 @@ pub fn printError(message: []const u8, err: ZipponError, query: ?[]const u8, sta writer.print(" \n", .{}) catch {}; // Align with the message } - log.debug("Parsing error: {s}", .{buffer.items}); + // log.debug("Parsing error: {s}", .{buffer.items}); send("{s}", .{buffer.items}); return err; diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 9327815..4d7632e 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -5,6 +5,7 @@ const Tokenizer = @import("tokenizers/ziql.zig").Tokenizer; const Token = @import("tokenizers/ziql.zig").Token; const dtype = @import("dtype"); +const s2t = dtype.s2t; const UUID = dtype.UUID; const AND = dtype.AND; const OR = dtype.OR; @@ -12,6 +13,7 @@ const DataType = dtype.DataType; const Filter = @import("stuffs/filter.zig").Filter; const Condition = @import("stuffs/filter.zig").Condition; +const ConditionValue = @import("stuffs/filter.zig").ConditionValue; const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData; const AdditionalDataMember = @import("stuffs/additionalData.zig").AdditionalDataMember; @@ -111,6 +113,20 @@ pub const Parser = struct { send("{s}", .{buffer.items}); } + pub fn sendUUID(self: Parser, uuid: UUID) ZiQlParserError!void { + var buffer = std.ArrayList(u8).init(self.allocator); + defer buffer.deinit(); + + const writer = buffer.writer(); + writer.writeByte('[') catch return ZiQlParserError.WriteError; + writer.writeByte('"') catch return ZiQlParserError.WriteError; + writer.writeAll(&uuid.format_uuid()) catch return ZiQlParserError.WriteError; + writer.writeAll("\", ") catch return ZiQlParserError.WriteError; + writer.writeByte(']') catch return ZiQlParserError.WriteError; + + send("{s}", .{buffer.items}); + } + pub fn parse(self: Parser) ZipponError!void { var state: State = .start; var additional_data = AdditionalData.init(self.allocator); @@ -349,10 +365,10 @@ pub const Parser = struct { var error_message_buffer = std.ArrayList(u8).init(self.allocator); defer error_message_buffer.deinit(); + const error_message_buffer_writer = error_message_buffer.writer(); error_message_buffer_writer.writeAll("Error missing: ") catch return ZipponError.WriteError; - // TODO: Print the entire list of missing if (!(self.file_engine.checkIfAllMemberInMap(struct_name, &data_map, &error_message_buffer) catch { return ZiQlParserError.StructNotFound; })) { @@ -366,22 +382,8 @@ pub const Parser = struct { token.loc.end, ); } - const uuid = self.file_engine.writeEntity(struct_name, data_map) catch { - send("ZipponDB error: Couln't write new data to file", .{}); - continue; - }; - - var buffer = std.ArrayList(u8).init(self.allocator); - defer buffer.deinit(); - - const writer = buffer.writer(); - writer.writeByte('[') catch return ZiQlParserError.WriteError; - writer.writeByte('"') catch return ZiQlParserError.WriteError; - writer.writeAll(&uuid.format_uuid()) catch return ZiQlParserError.WriteError; - writer.writeAll("\"") catch return ZiQlParserError.WriteError; - writer.writeByte(']') catch return ZiQlParserError.WriteError; - send("{s}", .{buffer.items}); - + const uuid = self.file_engine.writeEntity(struct_name, data_map) catch return ZipponError.CantWriteEntity; + try self.sendUUID(uuid); state = .end; }, @@ -638,7 +640,16 @@ pub const Parser = struct { } } - condition.value = self.toker.buffer[start_index..token.loc.end]; + condition.value = switch (condition.data_type) { + .int => ConditionValue.initInt(self.toker.buffer[start_index..token.loc.end]), + .float => ConditionValue.initFloat(self.toker.buffer[start_index..token.loc.end]), + .str => ConditionValue.initStr(self.toker.buffer[start_index..token.loc.end]), + .date => ConditionValue.initDate(self.toker.buffer[start_index..token.loc.end]), + .time => ConditionValue.initTime(self.toker.buffer[start_index..token.loc.end]), + .datetime => ConditionValue.initDateTime(self.toker.buffer[start_index..token.loc.end]), + .bool => ConditionValue.initBool(self.toker.buffer[start_index..token.loc.end]), + else => unreachable, // TODO: Make for link and array =| + }; state = .end; }, @@ -1026,46 +1037,6 @@ test "ADD" { try testParsing("ADD User (name = 'Bob', email='bob@email.com', age=-55, scores=[ 1 ], friends=[], bday=2000/01/01, a_time=12:04:54.8741, last_order=2000/01/01-12:45)"); } -test "UPDATE" { - try testParsing("UPDATE User {name = 'Bob'} TO (email='new@gmail.com')"); -} - -test "DELETE" { - try testParsing("DELETE User {name='Bob'}"); -} - -test "GRAB filter with string" { - try testParsing("GRAB User {name = 'Bob'}"); - try testParsing("GRAB User {name != 'Brittany Rogers'}"); -} - -test "GRAB with additional data" { - try testParsing("GRAB User [1] {age < 18}"); - try testParsing("GRAB User [name] {age < 18}"); - try testParsing("GRAB User [100; name] {age < 18}"); -} - -test "GRAB filter with int" { - try testParsing("GRAB User {age = 18}"); - try testParsing("GRAB User {age > -18}"); - try testParsing("GRAB User {age < 18}"); - try testParsing("GRAB User {age <= 18}"); - try testParsing("GRAB User {age >= 18}"); - try testParsing("GRAB User {age != 18}"); -} - -test "GRAB filter with date" { - try testParsing("GRAB User {bday > 2000/01/01}"); - try testParsing("GRAB User {a_time < 08:00}"); - try testParsing("GRAB User {last_order > 2000/01/01-12:45}"); -} - -test "Specific query" { - try testParsing("GRAB User"); - try testParsing("GRAB User {}"); - try testParsing("GRAB User [1]"); -} - test "Synthax error" { try expectParsingError("GRAB {}", ZiQlParserError.StructNotFound); try expectParsingError("GRAB User {qwe = 'qwe'}", ZiQlParserError.MemberNotFound);