Removed some TODO and changer so now it parse all existing file

Before I was parsing all files, getting the max index then parsing from 0 to the max. Byt now that I delete empty files, I need to parse only existing one.
2025-01-16 22:17:42 +01:00 · 2025-01-16 22:17:42 +01:00 · d1b430a3d5
commit d1b430a3d5
parent a16c9b29c1
7 changed files with 50 additions and 54 deletions
--- a/lib/types/stringToType.zig
+++ b/lib/types/stringToType.zig
@ -2,9 +2,6 @@ const std = @import("std");
 const UUID = @import("uuid.zig").UUID;
 const DateTime = @import("date.zig").DateTime;

-// FIXME: Stop returning arrayList and use toOwnedSlice instead
-
-// TODO: Put those functions somewhere else
 pub fn parseInt(value_str: []const u8) i32 {
    return std.fmt.parseInt(i32, value_str, 10) catch return 0;
 }
--- a/src/dataStructure/relationMap.zig
+++ b/src/dataStructure/relationMap.zig
@ -1,6 +1,6 @@
 // A relation map is all data needed to add relationship at the during parsing
 // How it work is that, the first time I parse the struct files, like User, I populate a map of UUID empty string
-// And in the JSON string I write {|<[16]u8>|} inside. Then I can use this struct to parse the file again
+// And in the JSON string I write {<|[16]u8|>} inside. Then I can use this struct to parse the file again
 // And if the UUID is in the map, I write the JSON if in its value in the map
 // It need to be recurcive as additional data can do stuff like [name, friends [name, best_friend]]
 // I could use parseEntities in a recursive way. But that mean ready the file at each loop =/
@ -13,7 +13,7 @@
 // Ok so I need to go recursive on parseEntities
 // So I parse one time, if additional data has relationship, I create a list of RelationMap
 // When I parse, I populate RelationMap with UUID I want
-// Then for each RelationMap, I parse the files again this time to update the first JSON that now have {|<>|}
+// Then for each RelationMap, I parse the files again this time to update the first JSON that now have {<||>}
 // With a sub additionalData. If there is an additional data relation, I recurcive.
 // So I need an option in parseEntity to either write the first JSON or update the existing one
 //
@ -34,16 +34,16 @@ member_name: []const u8,
 additional_data: AdditionalData,
 map: *std.AutoHashMap([16]u8, JsonString),

-/// Will use a string in the JSON format and look for {|<[16]u8>|}
+/// Will use a string in the JSON format and look for {<|[16]u8|>}
 /// It will then check if it is for the right member name and if so, add an empty JSON string at the key
 pub fn populate(self: *RelationMap, input: []const u8) ZipponError!void {
    var uuid_bytes: [16]u8 = undefined;
    var start: usize = 0;
-    while (std.mem.indexOf(u8, input[start..], "{|<")) |pos| {
+    while (std.mem.indexOf(u8, input[start..], "{<|")) |pos| {
        const pattern_start = start + pos + 3;
        const pattern_end = pattern_start + 16;

-        const member_end = if (input[pattern_start - 4] == '[') pattern_start - 6 else pattern_start - 5; // This should be ": {|<"
+        const member_end = if (input[pattern_start - 4] == '[') pattern_start - 6 else pattern_start - 5; // This should be ": {<|"
        var member_start = member_end - 1;
        while (input[member_start] != ' ') : (member_start -= 1) {}
        member_start += 1;
@ -62,11 +62,11 @@ pub fn populate(self: *RelationMap, input: []const u8) ZipponError!void {
    }
 }

-// Array are pack in format {|<[16]u8>|},{|<[16]u8>|},{|<[16]u8>|},{|<[16]u8>|},
+// Array are pack in format {<|[16]u8|>},{<|[16]u8|>},{<|[16]u8|>},{<|[16]u8|>},
 fn populateArray(self: *RelationMap, input: []const u8, origin: usize) ZipponError!usize {
    var uuid_bytes: [16]u8 = undefined;
    var start = origin;
-    while (input.len > start + 23 and std.mem.eql(u8, input[start .. start + 3], "{|<") and std.mem.eql(u8, input[start + 19 .. start + 23], ">|},")) : (start += 23) {
+    while (input.len > start + 23 and std.mem.eql(u8, input[start .. start + 3], "{<|") and std.mem.eql(u8, input[start + 19 .. start + 23], "|>},")) : (start += 23) {
        for (start + 3..start + 19, 0..) |i, j| uuid_bytes[j] = input[i];
        self.map.put(uuid_bytes, JsonString{}) catch return ZipponError.MemoryError;
    }
--- a/src/file/entityWriter.zig
+++ b/src/file/entityWriter.zig
@ -76,7 +76,7 @@ fn writeValue(writer: anytype, value: zid.Data, data_type: DataType) !void {
            }
            const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); // Maybe pass that comptime to prevent parsing it everytime
            if (!std.meta.eql(v, uuid.bytes)) {
-                try writer.print("{{|<{s}>|}}", .{v});
+                try writer.print("{{<|{s}|>}}", .{v});
            } else {
                try writer.print("{{}}", .{});
            }
@ -104,7 +104,7 @@ fn writeArray(writer: anytype, data: zid.Data, data_type: DataType) ZipponError!
        .IntArray => while (iter.next()) |v| writer.print("{d}, ", .{v.Int}) catch return ZipponError.WriteError,
        .FloatArray => while (iter.next()) |v| writer.print("{d}", .{v.Float}) catch return ZipponError.WriteError,
        .StrArray => while (iter.next()) |v| writer.print("\"{s}\"", .{v.Str}) catch return ZipponError.WriteError,
-        .UUIDArray => while (iter.next()) |v| writer.print("{{|<{s}>|}},", .{v.UUID}) catch return ZipponError.WriteError,
+        .UUIDArray => while (iter.next()) |v| writer.print("{{<|{s}|>}},", .{v.UUID}) catch return ZipponError.WriteError,
        .BoolArray => while (iter.next()) |v| writer.print("{any}", .{v.Bool}) catch return ZipponError.WriteError,
        .UnixArray => while (iter.next()) |v| {
            const datetime = DateTime.initUnix(v.Unix);
@ -122,12 +122,12 @@ fn writeArray(writer: anytype, data: zid.Data, data_type: DataType) ZipponError!
    writer.writeByte(']') catch return ZipponError.WriteError;
 }

-/// Take a string in the JSON format and look for {|<[16]u8>|}, then will look into the map and check if it can find this UUID
-/// If it find it, it ill replace the {|<[16]u8>|} will the value
+/// Take a string in the JSON format and look for {<|[16]u8|>}, then will look into the map and check if it can find this UUID
+/// If it find it, it ill replace the {<|[16]u8|>} will the value
 pub fn updateWithRelation(writer: anytype, input: []const u8, map: std.AutoHashMap([16]u8, JsonString)) ZipponError!void {
    var uuid_bytes: [16]u8 = undefined;
    var start: usize = 0;
-    while (std.mem.indexOf(u8, input[start..], "{|<")) |pos| {
+    while (std.mem.indexOf(u8, input[start..], "{<|")) |pos| {
        const pattern_start = start + pos + 3;
        const pattern_end = pattern_start + 16;

@ -155,7 +155,7 @@ pub fn updateWithRelation(writer: anytype, input: []const u8, map: std.AutoHashM
 fn updateArray(writer: anytype, input: []const u8, map: std.AutoHashMap([16]u8, JsonString), origin: usize) ZipponError!usize {
    var uuid_bytes: [16]u8 = undefined;
    var start = origin;
-    while (input.len > start + 23 and std.mem.eql(u8, input[start .. start + 3], "{|<") and std.mem.eql(u8, input[start + 19 .. start + 23], ">|},")) : (start += 23) {
+    while (input.len > start + 23 and std.mem.eql(u8, input[start .. start + 3], "{<|") and std.mem.eql(u8, input[start + 19 .. start + 23], "|>},")) : (start += 23) {
        @memcpy(uuid_bytes[0..], input[start + 3 .. start + 19]);
        if (map.get(uuid_bytes)) |json_string| {
            writer.writeAll(json_string.slice) catch return ZipponError.WriteError;
--- a/src/file/read.zig
+++ b/src/file/read.zig
@ -50,18 +50,17 @@ pub fn populateFileIndexUUIDMap(
    defer arena.deinit();
    const allocator = arena.allocator();

-    const max_file_index = try self.maxFileIndex(sstruct.name);
-
    const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
+    const to_parse = try self.allFileIndex(allocator, sstruct.name);

    // Multi-threading setup
    var sync_context = ThreadSyncContext.init(
        0,
-        max_file_index + 1,
+        to_parse.len,
    );

    // Create a thread-safe writer for each file
-    var thread_writer_list = allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return ZipponError.MemoryError;
+    var thread_writer_list = allocator.alloc(std.ArrayList(UUID), to_parse.len) catch return ZipponError.MemoryError;
    defer {
        for (thread_writer_list) |list| list.deinit();
        allocator.free(thread_writer_list);
@ -72,10 +71,10 @@ pub fn populateFileIndexUUIDMap(
    }

    // Spawn threads for each file
-    for (0..(max_file_index + 1)) |file_index| {
+    for (to_parse, 0..) |file_index, i| {
        self.thread_pool.spawn(populateFileIndexUUIDMapOneFile, .{
            sstruct,
-            &thread_writer_list[file_index],
+            &thread_writer_list[i],
            file_index,
            dir,
            &sync_context,
@ -141,29 +140,29 @@ pub fn populateVoidUUIDMap(
    const allocator = arena.allocator();

    const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
-    const max_file_index = try self.maxFileIndex(sstruct.name);

    const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
+    const to_parse = try self.allFileIndex(allocator, sstruct.name);

    // Multi-threading setup
    var sync_context = ThreadSyncContext.init(
        additional_data.limit,
-        max_file_index + 1,
+        to_parse.len,
    );

    // Create a thread-safe writer for each file
-    var thread_writer_list = allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return ZipponError.MemoryError;
+    var thread_writer_list = allocator.alloc(std.ArrayList(UUID), to_parse.len + 1) catch return ZipponError.MemoryError;

    for (thread_writer_list) |*list| {
        list.* = std.ArrayList(UUID).init(allocator);
    }

    // Spawn threads for each file
-    for (0..(max_file_index + 1)) |file_index| {
+    for (to_parse, 0..) |file_index, i| {
        self.thread_pool.spawn(populateVoidUUIDMapOneFile, .{
            sstruct,
            filter,
-            &thread_writer_list[file_index],
+            &thread_writer_list[i],
            file_index,
            dir,
            &sync_context,
@ -249,9 +248,7 @@ pub fn parseEntities(
    const writer = buff.writer();

    const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
-    const max_file_index = try self.maxFileIndex(sstruct.name);
-
-    log.debug("Max file index {d}", .{max_file_index});
+    const to_parse = try self.allFileIndex(allocator, struct_name);

    // If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them
    if (additional_data.childrens.items.len == 0)
@ -267,21 +264,21 @@ pub fn parseEntities(
    // Multi thread stuffs
    var sync_context = ThreadSyncContext.init(
        additional_data.limit,
-        max_file_index + 1,
+        to_parse.len,
    );

    // Do an array of writer for each thread
    // Could I create just the number of max cpu ? Because if I have 1000 files, I do 1000 list
    // But at the end, only the number of use CPU/Thread will use list simultanously
    // So I could pass list from a thread to another technicly
-    var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return ZipponError.MemoryError;
+    var thread_writer_list = allocator.alloc(std.ArrayList(u8), to_parse.len) catch return ZipponError.MemoryError;

    // Start parsing all file in multiple thread
-    for (0..(max_file_index + 1)) |file_index| {
+    for (to_parse, 0..) |file_index, i| {
        thread_writer_list[file_index] = std.ArrayList(u8).init(allocator);

        self.thread_pool.spawn(parseEntitiesOneFile, .{
-            thread_writer_list[file_index].writer(),
+            thread_writer_list[i].writer(),
            file_index,
            dir,
            sstruct.zid_schema,
@ -303,11 +300,11 @@ pub fn parseEntities(
    // Now I need to do the relation stuff, meaning parsing new files to get the relationship value
    // Without relationship to return, this function is basically finish here

-    // Here I take the JSON string and I parse it to find all {|<>|} and add them to the relation map with an empty JsonString
+    // Here I take the JSON string and I parse it to find all {<||>} and add them to the relation map with an empty JsonString
    for (relation_maps) |*relation_map| try relation_map.populate(buff.items);

    // I then call parseEntitiesRelationMap on each
-    // This will update the buff items to be the same Json but with {|<[16]u8>|} replaced with the right Json
+    // This will update the buff items to be the same Json but with {<|[16]u8|>} replaced with the right Json
    for (relation_maps) |*relation_map| try self.parseEntitiesRelationMap(allocator, relation_map.struct_name, relation_map, &buff);

    return buff.toOwnedSlice() catch return ZipponError.MemoryError;
@ -362,9 +359,9 @@ fn parseEntitiesOneFile(

 // Receive a map of UUID -> empty JsonString
 // Will parse the files and update the value to the JSON string of the entity that represent the key
-// Will then write the input with the JSON in the map looking for {|<>|}
-// Once the new input received, call parseEntitiesRelationMap again the string still contain {|<>|} because of sub relationship
-// The buffer contain the string with {|<>|} and need to be updated at the end
+// Will then write the input with the JSON in the map looking for {<||>}
+// Once the new input received, call parseEntitiesRelationMap again the string still contain {<||>} because of sub relationship
+// The buffer contain the string with {<||>} and need to be updated at the end
 pub fn parseEntitiesRelationMap(
    self: *Self,
    parent_allocator: Allocator,
@ -449,13 +446,13 @@ pub fn parseEntitiesRelationMap(
    buff.clearRetainingCapacity();
    buff.writer().writeAll(new_buff.items) catch return ZipponError.WriteError;

-    // Now here I need to iterate if buff.items still have {|<>|}
+    // Now here I need to iterate if buff.items still have {<||>}

-    // Here I take the JSON string and I parse it to find all {|<>|} and add them to the relation map with an empty JsonString
+    // Here I take the JSON string and I parse it to find all {<||>} and add them to the relation map with an empty JsonString
    for (relation_maps) |*sub_relation_map| try sub_relation_map.populate(buff.items);

    // I then call parseEntitiesRelationMap on each
-    // This will update the buff items to be the same Json but with {|<[16]u8>|} replaced with the right Json
+    // This will update the buff items to be the same Json but with {<|[16]u8|>} replaced with the right Json
    for (relation_maps) |*sub_relation_map| try parseEntitiesRelationMap(self, allocator, sub_relation_map.struct_name, sub_relation_map, buff);
 }

--- a/src/file/utils.zig
+++ b/src/file/utils.zig
@ -111,7 +111,6 @@ pub fn string2Data(allocator: Allocator, value: ConditionValue) ZipponError!zid.
 }

 /// Take a map from the parseNewData and return an ordered array of Data to be use in a DataWriter
-/// TODO: Optimize and maybe put it somewhere else than fileEngine
 pub fn orderedNewData(
    self: *Self,
    allocator: Allocator,
--- a/src/file/write.zig
+++ b/src/file/write.zig
@ -76,18 +76,18 @@ pub fn updateEntities(
    const allocator = arena.allocator();

    const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
-    const max_file_index = try self.maxFileIndex(sstruct.name);

    const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
+    const to_parse = try self.allFileIndex(allocator, struct_name);

    // Multi-threading setup
    var sync_context = ThreadSyncContext.init(
        additional_data.limit,
-        max_file_index + 1,
+        to_parse.len,
    );

    // Create a thread-safe writer for each file
-    var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return ZipponError.MemoryError;
+    var thread_writer_list = allocator.alloc(std.ArrayList(u8), to_parse.len) catch return ZipponError.MemoryError;
    for (thread_writer_list) |*list| {
        list.* = std.ArrayList(u8).init(allocator);
    }
@ -101,13 +101,13 @@ pub fn updateEntities(
    }

    // Spawn threads for each file
-    for (0..(max_file_index + 1)) |file_index| {
+    for (to_parse, 0..) |file_index, i| {
        self.thread_pool.spawn(updateEntitiesOneFile, .{
            new_data_buff,
            sstruct,
            filter,
            &map,
-            thread_writer_list[file_index].writer(),
+            thread_writer_list[i].writer(),
            file_index,
            dir,
            &sync_context,
@ -239,28 +239,28 @@ pub fn deleteEntities(
    const allocator = arena.allocator();

    const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
-    const max_file_index = try self.maxFileIndex(sstruct.name);

    const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
+    const to_parse = try self.allFileIndex(allocator, struct_name);

    // Multi-threading setup
    var sync_context = ThreadSyncContext.init(
        additional_data.limit,
-        max_file_index + 1,
+        to_parse.len,
    );

    // Create a thread-safe writer for each file
-    var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return ZipponError.MemoryError;
+    var thread_writer_list = allocator.alloc(std.ArrayList(u8), to_parse.len) catch return ZipponError.MemoryError;
    for (thread_writer_list) |*list| {
        list.* = std.ArrayList(u8).init(allocator);
    }

    // Spawn threads for each file
-    for (0..(max_file_index + 1)) |file_index| {
+    for (to_parse, 0..) |file_index, i| {
        self.thread_pool.spawn(deleteEntitiesOneFile, .{
            sstruct,
            filter,
-            thread_writer_list[file_index].writer(),
+            thread_writer_list[i].writer(),
            file_index,
            dir,
            &sync_context,
@ -277,7 +277,8 @@ pub fn deleteEntities(
    }
    writer.writeByte(']') catch return ZipponError.WriteError;

-    // Update UUID file index map FIXME: Stop doing that and just remove UUID from the map itself instead of reparsing everything at the end
+    //  FIXME: Stop doing that and just remove UUID from the map itself instead of reparsing everything at the end
+    //  It's just that I can't do it in deleteEntitiesOneFile itself
    sstruct.uuid_file_index.map.clearRetainingCapacity();
    _ = sstruct.uuid_file_index.arena.reset(.free_all);
    try self.populateFileIndexUUIDMap(sstruct, sstruct.uuid_file_index);
--- a/src/ziql/tokenizer.zig
+++ b/src/ziql/tokenizer.zig
@ -19,6 +19,7 @@ pub const Token = struct {
        .{ "NOW", .keyword_now },
        .{ "APPEND", .keyword_append },
        .{ "POP", .keyword_pop },
+        .{ "CLEAR", .keyword_clear },
        .{ "REMOVE", .keyword_remove },
        .{ "REMOVEAT", .keyword_remove_at },
        .{ "grab", .keyword_grab },
@ -56,6 +57,7 @@ pub const Token = struct {
        keyword_now,
        keyword_append,
        keyword_pop,
+        keyword_clear,
        keyword_remove,
        keyword_remove_at,