diff --git a/lib/zid.zig b/lib/zid.zig index 7d055ee..9afc6a8 100644 --- a/lib/zid.zig +++ b/lib/zid.zig @@ -509,7 +509,7 @@ pub const ArrayIterator = struct { /// Performance concern once again. pub const DataWriter = struct { file: std.fs.File, - writer: std.io.BufferedWriter(4096, std.fs.File.Writer), // TODO: Increase buffer size, this should speed up a bit + writer: std.io.BufferedWriter(4096, std.fs.File.Writer), pub fn init(name: []const u8, dir: ?std.fs.Dir) !DataWriter { const d_ = dir orelse std.fs.cwd(); diff --git a/src/entityWriter.zig b/src/entityWriter.zig index d9d6510..62c78d4 100644 --- a/src/entityWriter.zig +++ b/src/entityWriter.zig @@ -10,8 +10,6 @@ const UUID = dtype.UUID; const ZipponError = @import("stuffs/errors.zig").ZipponError; -// TODO: Try std.json - pub const EntityWriter = struct { pub fn writeEntityTable( writer: anytype, diff --git a/src/fileEngine.zig b/src/fileEngine.zig index b1d67e0..2ab31c1 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -98,7 +98,6 @@ pub const FileEngine = struct { // --------------------Init folder and files-------------------- /// Create the main folder. Including DATA, LOG and BACKUP - /// TODO: Maybe start using a fixed lenght buffer instead of free everytime, but that not that important pub fn createMainDirectories(self: *FileEngine) ZipponError!void { var path_buff = std.fmt.bufPrint(&path_buffer, "{s}", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; @@ -462,7 +461,7 @@ pub const FileEngine = struct { // I then call parseEntitiesRelationMap on each // This will update the buff items to be the same Json but with {|<[16]u8>|} replaced with the right Json - for (relation_maps) |*relation_map| try self.parseEntitiesRelationMap(relation_map.struct_name, relation_map, &buff); + for (relation_maps) |*relation_map| try self.parseEntitiesRelationMap(allocator, relation_map.struct_name, relation_map, &buff); return buff.toOwnedSlice() catch return ZipponError.MemoryError; } @@ -522,11 +521,12 @@ pub const FileEngine = struct { // TODO: Use the new function in SchemaEngine to reduce the number of files to parse pub fn parseEntitiesRelationMap( self: *FileEngine, + parent_allocator: Allocator, struct_name: []const u8, relation_map: *RelationMap, buff: *std.ArrayList(u8), ) ZipponError!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(parent_allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -541,9 +541,7 @@ pub const FileEngine = struct { ); const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); - const max_file_index = try self.maxFileIndex(sstruct.name); // Chqnge to use a list of file index - - log.debug("Max file index {d}", .{max_file_index}); + const to_parse = try self.schema_engine.fileListToParse(allocator, struct_name, relation_map.map.*); // If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them if (relation_map.additional_data.childrens.items.len == 0) { @@ -563,21 +561,21 @@ pub const FileEngine = struct { // Multi thread stuffs var sync_context = ThreadSyncContext.init( relation_map.additional_data.limit, - max_file_index + 1, + to_parse.len, ); // Do one writer for each thread otherwise it create error by writing at the same time var thread_map_list = allocator.alloc( std.AutoHashMap([16]u8, JsonString), - max_file_index + 1, + to_parse.len, ) catch return FileEngineError.MemoryError; // Start parsing all file in multiple thread - for (0..(max_file_index + 1)) |file_index| { - thread_map_list[file_index] = relation_map.map.cloneWithAllocator(allocator) catch return ZipponError.MemoryError; + for (to_parse, 0..) |file_index, i| { + thread_map_list[i] = relation_map.map.cloneWithAllocator(allocator) catch return ZipponError.MemoryError; self.thread_pool.spawn(parseEntitiesRelationMapOneFile, .{ - &thread_map_list[file_index], + &thread_map_list[i], file_index, dir, sstruct.zid_schema, @@ -614,7 +612,7 @@ pub const FileEngine = struct { // I then call parseEntitiesRelationMap on each // This will update the buff items to be the same Json but with {|<[16]u8>|} replaced with the right Json - for (relation_maps) |*sub_relation_map| try self.parseEntitiesRelationMap(sub_relation_map.struct_name, sub_relation_map, buff); + for (relation_maps) |*sub_relation_map| try self.parseEntitiesRelationMap(allocator, sub_relation_map.struct_name, sub_relation_map, buff); } fn parseEntitiesRelationMapOneFile( diff --git a/src/schemaEngine.zig b/src/schemaEngine.zig index 0093167..867b95e 100644 --- a/src/schemaEngine.zig +++ b/src/schemaEngine.zig @@ -258,18 +258,19 @@ pub const SchemaEngine = struct { alloc: Allocator, struct_name: []const u8, map: std.AutoHashMap([16]u8, JsonString), - ) ![]usize { + ) ZipponError![]usize { const sstruct = try self.structName2SchemaStruct(struct_name); var unique_indices = std.AutoHashMap(usize, void).init(alloc); + defer unique_indices.deinit(); var iter = map.keyIterator(); while (iter.next()) |uuid| { - if (sstruct.uuid_file_index.get(uuid.*)) |file_index| { - try unique_indices.put(file_index, {}); + if (sstruct.uuid_file_index.get(UUID{ .bytes = uuid.* })) |file_index| { + unique_indices.put(file_index, {}) catch return ZipponError.MemoryError; } } - var result = try alloc.alloc(usize, unique_indices.count()); + var result = alloc.alloc(usize, unique_indices.count()) catch return ZipponError.MemoryError; var i: usize = 0; var index_iter = unique_indices.keyIterator(); while (index_iter.next()) |index| { diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 539c55d..bf134fc 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -70,9 +70,7 @@ pub const Parser = struct { file_engine: *FileEngine, schema_engine: *SchemaEngine, - // TODO: Improve memory management, stop using an alloc in init maybe pub fn init(toker: *Tokenizer, file_engine: *FileEngine, schema_engine: *SchemaEngine) Parser { - // Do I need to init a FileEngine at each Parser, can't I put it in the CLI parser instead ? return Parser{ .toker = toker, .file_engine = file_engine, @@ -309,7 +307,6 @@ pub const Parser = struct { ), }, - // TODO: Speed up batch by flushing one time and speed up how to find which file to use .parse_new_data_and_add_data => { var order = std.ArrayList([]const u8).init(allocator); defer order.deinit(); @@ -1048,13 +1045,11 @@ pub const Parser = struct { } } }, - .link, .link_array => {}, // TODO: Check if next token is either [ or { + .link, .link_array => {}, else => unreachable, } // And finally create the ConditionValue - // FIXME: This take the majority of time when ADD in big batch. Need serious speed up. I aim to be able to load a simple 10MB query in less then 0.1s - // Rn for 100_000 users for around 10Mb, it take 30s... I mean come on, 30s ? For 10MB ? That suck... switch (data_type) { .int => return ConditionValue.initInt(self.toker.buffer[start_index..token.loc.end]), .float => return ConditionValue.initFloat(self.toker.buffer[start_index..token.loc.end]), @@ -1110,7 +1105,7 @@ pub const Parser = struct { additional_data.limit = 1; const link_sstruct = try self.schema_engine.linkedStructName(struct_name, member_name); - if (token.tag == .l_brace) filter = try self.parseFilter( // FIXME: Look like the filter is empty after that (root node is Empty) + if (token.tag == .l_brace) filter = try self.parseFilter( allocator, link_sstruct.name, false, @@ -1126,7 +1121,6 @@ pub const Parser = struct { .empty => null, else => filter, }; - std.debug.print("Filter: {any}\n", .{filter}); // Here I have the filter and additionalData const map = allocator.create(std.AutoHashMap(UUID, void)) catch return ZipponError.MemoryError;