const std = @import("std"); const schemaEngine = @import("schemaEngine.zig"); const Allocator = std.mem.Allocator; const UUID = @import("types/uuid.zig").UUID; const DataType = @import("types/dataType.zig").DataType; //TODO: Create a union class and chose between file and memory /// Manage everything that is relate to read or write in files /// Or even get stats, whatever. If it touch files, it's here pub const FileEngine = struct { allocator: Allocator, path_to_DATA_dir: []const u8, // The path to the DATA folder max_file_size: usize = 5e+4, // 50kb TODO: Change pub const Token = struct { tag: Tag, loc: Loc, pub const Loc = struct { start: usize, end: usize, }; pub const Tag = enum { eof, invalid, string_literal, int_literal, float_literal, identifier, equal, bang, // ! pipe, // | l_paren, // ( r_paren, // ) l_bracket, // [ r_bracket, // ] l_brace, // { r_brace, // } semicolon, // ; comma, // , angle_bracket_left, // < angle_bracket_right, // > angle_bracket_left_equal, // <= angle_bracket_right_equal, // >= equal_angle_bracket_right, // => period, // . bang_equal, // != }; }; pub const Tokenizer = struct { buffer: [:0]const u8, index: usize, // Maybe change that to use the stream directly so I dont have to read the line 2 times pub fn init(buffer: [:0]const u8) Tokenizer { // Skip the UTF-8 BOM if present. return .{ .buffer = buffer, .index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, // WTF ? I guess some OS add that or some shit like that }; } const State = enum { start, string_literal, float, int, }; pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 { return self.buffer[token.loc.start..token.loc.end]; } pub fn next(self: *Tokenizer) Token { // That ugly but work if (self.buffer[self.index] == ' ') self.index += 1; var state: State = .start; var result: Token = .{ .tag = undefined, .loc = .{ .start = self.index, .end = undefined, }, }; while (true) : (self.index += 1) { const c = self.buffer[self.index]; if (self.index == self.buffer.len) break; switch (state) { .start => switch (c) { '\'' => { state = .string_literal; result.tag = .string_literal; }, '0'...'9', '-' => { state = .int; result.tag = .int_literal; }, '[' => { result.tag = .l_bracket; self.index += 1; break; }, ']' => { result.tag = .r_bracket; self.index += 1; break; }, else => std.debug.print("Unknow character: {c}\n", .{c}), }, .string_literal => switch (c) { '\'' => { self.index += 1; break; }, else => continue, }, .int => switch (c) { '.' => { state = .float; result.tag = .float_literal; }, '0'...'9' => continue, else => break, }, .float => switch (c) { '0'...'9' => { continue; }, else => { break; }, }, } } result.loc.end = self.index; return result; } }; const ComparisonValue = union { int: i64, float: f64, str: []const u8, bool_: bool, int_array: std.ArrayList(i64), str_array: std.ArrayList([]const u8), float_array: std.ArrayList(f64), bool_array: std.ArrayList(bool), }; /// use to parse file. It take a struct name and member name to know what to parse. /// An Operation from equal, different, superior, superior_or_equal, ... /// The DataType from int, float and str pub const Condition = struct { struct_name: []const u8, member_name: []const u8 = undefined, value: []const u8 = undefined, operation: enum { equal, different, superior, superior_or_equal, inferior, inferior_or_equal } = undefined, // Add more stuff like IN data_type: DataType = undefined, pub fn init(struct_name: []const u8) Condition { return Condition{ .struct_name = struct_name }; } }; pub fn init(allocator: Allocator, DATA_path: ?[]const u8) FileEngine { // I think use env variable for the path, idk, something better at least than just that 😕 return FileEngine{ .allocator = allocator, .path_to_DATA_dir = DATA_path orelse "ZipponDB/DATA", }; } /// Take a condition and an array of UUID and fill the array with all UUID that match the condition pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) !void { const max_file_index = try self.maxFileIndex(condition.struct_name); var current_index: usize = 0; var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); defer self.allocator.free(sub_path); var file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator"); defer file.close(); var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file var output_fbs = std.io.fixedBufferStream(&output); const writer = output_fbs.writer(); var buffered = std.io.bufferedReader(file.reader()); var reader = buffered.reader(); var compare_value: ComparisonValue = undefined; switch (condition.data_type) { .int => compare_value = ComparisonValue{ .int = parseInt(condition.value) }, .str => compare_value = ComparisonValue{ .str = condition.value }, .float => compare_value = ComparisonValue{ .float = parseFloat(condition.value) }, .bool => compare_value = ComparisonValue{ .bool_ = parseBool(condition.value) }, .int_array => compare_value = ComparisonValue{ .int_array = parseArrayInt(self.allocator, condition.value) }, .str_array => compare_value = ComparisonValue{ .str_array = parseArrayStr(self.allocator, condition.value) }, .float_array => compare_value = ComparisonValue{ .float_array = parseArrayFloat(self.allocator, condition.value) }, .bool_array => compare_value = ComparisonValue{ .bool_array = parseArrayBool(self.allocator, condition.value) }, } defer { switch (condition.data_type) { .int_array => compare_value.int_array.deinit(), .str_array => compare_value.str_array.deinit(), .float_array => compare_value.float_array.deinit(), .bool_array => compare_value.bool_array.deinit(), else => {}, } } var token: FileEngine.Token = undefined; const column_index = schemaEngine.columnIndexOfMember(condition.struct_name, condition.member_name); while (true) { output_fbs.reset(); reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) { error.EndOfStream => { output_fbs.reset(); // clear buffer before exit if (current_index == max_file_index) break; current_index += 1; self.allocator.free(sub_path); sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator"); file.close(); // Do I need to close ? I think so file = std.fs.cwd().openFile(sub_path, .{}) catch { std.debug.print("Error trying to open {s}\n", .{sub_path}); @panic("Can't open first file to init a data iterator"); }; buffered = std.io.bufferedReader(file.reader()); reader = buffered.reader(); continue; }, // file read till the end else => { std.debug.print("Error while reading file: {any}\n", .{err}); break; }, }; // Maybe use the stream directly to prevent duplicate the data // But I would need to change the Tokenizer a lot... const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]); defer self.allocator.free(null_terminated_string); var data_toker = Tokenizer.init(null_terminated_string); const uuid = try UUID.parse(output_fbs.getWritten()[0..36]); // Skip unwanted token for (0..column_index.?) |_| { _ = data_toker.next(); } token = data_toker.next(); // TODO: Add error for wrong condition like superior between 2 string or array switch (condition.operation) { .equal => { switch (condition.data_type) { .int => if (compare_value.int == parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float == parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .str => if (std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .bool => if (compare_value.bool_ == parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, .different => { switch (condition.data_type) { .int => if (compare_value.int != parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float != parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .str => if (!std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .bool => if (compare_value.bool_ != parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, .superior_or_equal => { switch (condition.data_type) { .int => if (compare_value.int <= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float <= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, .superior => { switch (condition.data_type) { .int => if (compare_value.int < parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float < parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, .inferior_or_equal => { switch (condition.data_type) { .int => if (compare_value.int >= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float >= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, .inferior => { switch (condition.data_type) { .int => if (compare_value.int > parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), .float => if (compare_value.float > parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid), // TODO: Implement for array too else => {}, } }, } } } // TODO: Clean a bit the code // Do I need multiple files too ? I mean it duplicate UUID a lot, if it's just to save a name like 'Bob', storing a long UUID is overkill // I could just use a tabular data format with separator using space - Or maybe I encode the uuid to take a minimum space as I always know it size pub fn writeEntity(self: FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID { const uuid = UUID.init(); const potential_file_index = try self.getFirstUsableIndexFile(struct_name); var file: std.fs.File = undefined; defer file.close(); var path: []const u8 = undefined; defer self.allocator.free(path); if (potential_file_index) |file_index| { path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, file_index }); file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch @panic("=("); } else { const max_index = try self.maxFileIndex(struct_name); path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, max_index + 1 }); file = std.fs.cwd().createFile(path, .{}) catch @panic("Error creating new data file"); } try file.seekFromEnd(0); try file.writer().print("{s}", .{uuid.format_uuid()}); const member_names = schemaEngine.structName2structMembers(struct_name); // This need to be in the same order all the time tho for (member_names) |member_name| { try file.writer().print(" {s}", .{data_map.get(member_name).?}); } try file.writer().print("\n", .{}); return uuid; } /// Use a filename in the format 1.zippondata and return the 1 /// Note that if I change the extension of the data file, I need to update that as it use a fixed len for the extension fn fileName2Index(_: FileEngine, file_name: []const u8) usize { return std.fmt.parseInt(usize, file_name[0..(file_name.len - 11)], 10) catch @panic("Couln't parse the int of a zippondata file."); } /// Use the map of file stat to find the first file with under the bytes limit. /// return the name of the file. If none is found, return null. fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) !?usize { const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); defer self.allocator.free(path); var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); defer member_dir.close(); var iter = member_dir.iterate(); while (try iter.next()) |entry| { const file_stat = try member_dir.statFile(entry.name); if (file_stat.size < self.max_file_size) return self.fileName2Index(entry.name); } return null; } /// Iter over all file and get the max name and return the value of it as usize /// So for example if there is 1.zippondata and 2.zippondata it return 2. fn maxFileIndex(self: FileEngine, struct_name: []const u8) !usize { const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name }); defer self.allocator.free(path); const member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true }); var count: usize = 0; var iter = member_dir.iterate(); while (try iter.next()) |entry| { if (entry.kind != std.fs.Dir.Entry.Kind.file) continue; count += 1; } return count - 1; } // TODO: Give the option to keep , dump or erase the data pub fn initDataFolder(self: FileEngine) !void { var data_dir = try std.fs.cwd().openDir(self.path_to_DATA_dir, .{}); defer data_dir.close(); for (schemaEngine.struct_name_list) |struct_name| { data_dir.makeDir(struct_name) catch |err| switch (err) { error.PathAlreadyExists => {}, else => return err, }; const struct_dir = try data_dir.openDir(struct_name, .{}); _ = struct_dir.createFile("0.zippondata", .{}) catch |err| switch (err) { error.PathAlreadyExists => {}, else => return err, }; } } }; test "Get list of UUID using condition" { const allocator = std.testing.allocator; var data_engine = FileEngine.init(allocator, null); var uuid_array = std.ArrayList(UUID).init(allocator); defer uuid_array.deinit(); const condition = FileEngine.Condition{ .struct_name = "User", .member_name = "email", .value = "adrien@mail.com", .operation = .equal, .data_type = .str }; try data_engine.getUUIDListUsingCondition(condition, &uuid_array); } test "Open dir" { const dir = std.fs.cwd(); const sub_dir = try dir.openDir("src/types", .{}); _ = sub_dir; } // Series of functions to use just before creating an entity. // Will transform the string of data into data of the right type./ pub fn parseInt(value_str: []const u8) i64 { return std.fmt.parseInt(i64, value_str, 10) catch return 0; } pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) { var array = std.ArrayList(i64).init(allocator); var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); while (it.next()) |x| { array.append(parseInt(x)) catch {}; } return array; } pub fn parseFloat(value_str: []const u8) f64 { return std.fmt.parseFloat(f64, value_str) catch return 0; } pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) { var array = std.ArrayList(f64).init(allocator); var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); while (it.next()) |x| { array.append(parseFloat(x)) catch {}; } return array; } pub fn parseBool(value_str: []const u8) bool { return (value_str[0] != '0'); } pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) { var array = std.ArrayList(bool).init(allocator); var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); while (it.next()) |x| { array.append(parseBool(x)) catch {}; } return array; } // FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) { var array = std.ArrayList([]const u8).init(allocator); var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " "); while (it.next()) |x| { const x_copy = allocator.dupe(u8, x) catch @panic("=("); array.append(x_copy) catch {}; } return array; } test "Data parsing" { const allocator = std.testing.allocator; // Int const in1: [3][]const u8 = .{ "1", "42", "Hello" }; const expected_out1: [3]i64 = .{ 1, 42, 0 }; for (in1, 0..) |value, i| { try std.testing.expect(parseInt(value) == expected_out1[i]); } // Int array const in2 = "[1 14 44 42 hello]"; const out2 = parseArrayInt(allocator, in2); defer out2.deinit(); const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 }; try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2)); // Float const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" }; const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 }; for (in3, 0..) |value, i| { try std.testing.expect(parseFloat(value) == expected_out3[i]); } // Float array const in4 = "[1.5 14.3 44.9999 42 hello]"; const out4 = parseArrayFloat(allocator, in4); defer out4.deinit(); const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 }; try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4)); // Bool const in5: [3][]const u8 = .{ "1", "Hello", "0" }; const expected_out5: [3]bool = .{ true, true, false }; for (in5, 0..) |value, i| { try std.testing.expect(parseBool(value) == expected_out5[i]); } // Bool array const in6 = "[1 0 0 1 1]"; const out6 = parseArrayBool(allocator, in6); defer out6.deinit(); const expected_out6: [5]bool = .{ true, false, false, true, true }; try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6)); // TODO: Test the string array } // Test tokenizer test "basic query" { try testTokenize("001 123 0185", &.{ .int_literal, .int_literal, .int_literal }); } fn testTokenize(source: [:0]const u8, expected_token_tags: []const FileEngine.Token.Tag) !void { var tokenizer = FileEngine.Tokenizer.init(source); for (expected_token_tags) |expected_token_tag| { const token = tokenizer.next(); try std.testing.expectEqual(expected_token_tag, token.tag); } }