Error found!!

Got an error that I could find for a while. It was when parsing a lot of files. Turn out it was the Thread Pool or something because if I run on 1 core, it's ok. Different thing in this commit, I just want to freeze a state that benchmark work.
2025-01-22 11:34:00 +01:00 · 2025-01-22 11:34:00 +01:00 · 7c34431702
commit 7c34431702
parent ebf91bb61c
11 changed files with 164 additions and 111 deletions
--- a/benchmark.zig
+++ b/benchmark.zig
@ -10,43 +10,81 @@ const times = [_][]const u8{ "12:04", "20:45:11", "03:11:13", "03:00:01.0152" };
 const datetimes = [_][]const u8{ "2000/01/01-12:04", "1954/04/02-20:45:11", "1998/01/21-03:11:13", "1977/12/31-03:00:01.0153" };
 const scores = [_]i32{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
-pub const std_options = .{
+pub const std_options = std.Options{
    .log_level = .info,
    .logFn = myLog,
 };
 var date_buffer: [64]u8 = undefined;
 var date_fa = std.heap.FixedBufferAllocator.init(&date_buffer);
 const date_allocator = date_fa.allocator();
 pub fn myLog(
    comptime message_level: std.log.Level,
-    comptime scope: @Type(.EnumLiteral),
+    comptime scope: @Type(.enum_literal),
    comptime format: []const u8,
    args: anytype,
 ) void {
-    if (true) return;
+    const level_txt = comptime message_level.asText();
-    _ = message_level;
+    const prefix = if (scope == .default) " - " else "(" ++ @tagName(scope) ++ ") - ";
-    _ = scope;
+
-    _ = format;
+    const potential_file: ?std.fs.File = std.fs.cwd().openFile("benchmarkDB/LOG/log", .{ .mode = .write_only }) catch null;
-    _ = args;
+
    if (potential_file) |file| {
        date_fa.reset();
        const now = @import("dtype").DateTime.now();
        var date_format_buffer = std.ArrayList(u8).init(date_allocator);
        defer date_format_buffer.deinit();
        now.format("YYYY/MM/DD-HH:mm:ss.SSSS", date_format_buffer.writer()) catch return;
        file.seekFromEnd(0) catch return;
        const writer = file.writer();
        writer.print("{s}{s}Time: {s} - ", .{ level_txt, prefix, date_format_buffer.items }) catch return;
        writer.print(format, args) catch return;
        writer.writeByte('\n') catch return;
        file.close();
    }
 }
 pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
    defer {
        const deinit_status = gpa.deinit();
        switch (deinit_status) {
            .leak => @panic("Oupsy"),
            .ok => {},
        }
    }
    try benchmark(gpa.allocator());
 }
 test "benchmark" {
    const allocator = std.testing.allocator;
    try benchmark(allocator);
 }
 // Maybe I can make it a test to use the testing alloc
-pub fn main() !void {
+pub fn benchmark(allocator: std.mem.Allocator) !void {
-    const allocator = std.heap.page_allocator;
+    const to_test = [_]usize{ 5, 50, 500, 5_000, 50_000, 500_000, 5_000_000 };
    const to_test = [_]usize{ 500, 50_000, 1_000_000 };
    var line_buffer: [1024 * 1024]u8 = undefined;
    for (to_test) |users_count| {
        var db_engine = DBEngine.init(allocator, "benchmarkDB", "schema/benchmark");
        defer db_engine.deinit();
        // Empty db
        {
            const null_term_query_str = try std.fmt.bufPrintZ(&line_buffer, "DELETE User {{}}", .{});
            db_engine.runQuery(null_term_query_str);
        }
        // Populate with random dummy value
        // Need some speed up, spended times to find that it is the parsonConditionValue that take time, the last switch to be exact, that parse str to value
        {
            std.debug.print("\n=====================================\n\n", .{});
            std.debug.print("Populating with {d} users.\n", .{users_count});
-            var prng = std.rand.DefaultPrng.init(0);
+            var prng = std.Random.DefaultPrng.init(0);
            const rng = prng.random();
            const populate_start_time = std.time.nanoTimestamp();
@ -62,7 +100,7 @@ pub fn main() !void {
                },
            );
-            for (users_count - 1) |_| {
+            for (0..users_count - 1) |_| {
                try writer.print(
                    "('{s}', '{s}', none)",
                    .{
@ -82,7 +120,7 @@ pub fn main() !void {
            std.debug.print("Populate duration: {d:.6} seconds\n\n", .{populate_duration});
-            var buffer = std.ArrayList(u8).init(std.heap.page_allocator);
+            var buffer = std.ArrayList(u8).init(allocator);
            defer buffer.deinit();
            try db_engine.file_engine.writeDbMetrics(&buffer);
            std.debug.print("{s}\n", .{buffer.items});
@ -99,10 +137,11 @@ pub fn main() !void {
        //    }
        //}
-        // Define your benchmark queries
+        // Run query
        {
            const queries = [_][]const u8{
                "GRAB User {}",
                "GRAB User {name='asd'}",
                "GRAB User [1] {}",
                "GRAB User [name] {}",
                "GRAB User {name = 'Charlie'}",
@ -122,8 +161,7 @@ pub fn main() !void {
                std.debug.print("Query: \t\t{s}\nDuration: \t{d:.6} ms\n\n", .{ query, duration });
            }
            std.debug.print("=====================================\n\n", .{});
        }
    }
    std.debug.print("=====================================\n\n", .{});
 }
--- a/build.zig
+++ b/build.zig
@ -103,6 +103,22 @@ pub fn build(b: *std.Build) void {
        test_step.dependOn(&run_tests6.step);
    }
    // Test
    // -----------------------------------------------
    {
        const tests1 = b.addTest(.{
            .root_source_file = b.path("lib/zid.zig"),
            .target = target,
            .optimize = optimize,
            .name = "File parsing",
            .test_runner = b.path("test_runner.zig"),
        });
        const run_tests1 = b.addRunArtifact(tests1);
        const test_step = b.step("benchmark-parsing", "Run the benchmark of ZipponData (Single thread).");
        test_step.dependOn(&run_tests1.step);
    }
    // Benchmark
    // -----------------------------------------------
    {
@ -152,7 +168,7 @@ pub fn build(b: *std.Build) void {
                .name = exe_name,
                .root_source_file = b.path("src/main.zig"),
                .target = tar,
-                .optimize = .ReleaseSafe,
+                .optimize = .ReleaseFast,
            });
            // Add the same imports as your main executable
--- a/lib/config.zig
+++ b/lib/config.zig
@ -1,6 +1,6 @@
 pub const BUFFER_SIZE = 1024 * 10; // Used a bit everywhere. The size for the schema for example. 10kB
 pub const MAX_FILE_SIZE = 1024 * 1024; // 1MB
-pub const CPU_CORE = 16;
+pub const CPU_CORE = 0;
 // Debug
 pub const PRINT_STATE = false;
--- a/lib/config_benchmark.zig
+++ b/lib/config_benchmark.zig
@ -1,6 +1,6 @@
 pub const BUFFER_SIZE = 1024 * 1024; // Used a bit everywhere. The size for the schema for example. 10kB
 pub const MAX_FILE_SIZE = 1024 * 1024; // 1MB
-pub const CPU_CORE = 16;
+pub const CPU_CORE = 1;
 // Debug
 pub const PRINT_STATE = false;
--- a/lib/zid.zig
+++ b/lib/zid.zig
@ -745,7 +745,7 @@ test "Write and Read" {
    try std.fs.cwd().deleteDir("tmp");
 }
-test "Benchmark Write and Read" {
+test "Benchmark Write and Read All" {
    const schema = &[_]DType{
        .Int,
        .Float,
@ -767,6 +767,20 @@ test "Benchmark Write and Read" {
    try benchmark(schema, data);
 }
 test "Benchmark Write and Read Simple User" {
    const schema = &[_]DType{
        .Int,
        .Str,
    };
    const data = &[_]Data{
        Data.initInt(1),
        Data.initStr("Bob"),
    };
    try benchmark(schema, data);
 }
 fn benchmark(schema: []const DType, data: []const Data) !void {
    const allocator = std.testing.allocator;
    const sizes = [_]usize{ 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000 };
--- a/src/cli/core.zig
+++ b/src/cli/core.zig
@ -27,15 +27,15 @@ file_engine: FileEngine = undefined,
 schema_engine: SchemaEngine = undefined,
 thread_engine: ThreadEngine = undefined,
-pub fn init(parent_allocator: Allocator, potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) Self {
+pub fn init(allocator: Allocator, potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) Self {
    var self = Self{};
-    const arena = parent_allocator.create(std.heap.ArenaAllocator) catch {
+    const arena = allocator.create(std.heap.ArenaAllocator) catch {
        log.err("Error when init Engine DB allocator", .{});
        self.state = .MissingAllocator;
        return self;
    };
-    arena.* = std.heap.ArenaAllocator.init(parent_allocator);
+    arena.* = std.heap.ArenaAllocator.init(allocator);
    self.arena = arena;
    self.allocator = arena.allocator();
@ -152,7 +152,7 @@ pub fn getEnvVariable(allocator: Allocator, variable: []const u8) ?[]const u8 {
 pub fn runQuery(self: *Self, null_term_query_str: [:0]const u8) void {
    var parser = ziqlParser.init(&self.file_engine, &self.schema_engine);
-    parser.parse(null_term_query_str) catch |err| log.err("Error parsing: {any}", .{err});
+    parser.parse(self.allocator, null_term_query_str) catch |err| log.err("Error parsing: {any}", .{err});
 }
 pub fn deinit(self: *Self) void {
--- a/src/file/entityWriter.zig
+++ b/src/file/entityWriter.zig
@ -8,6 +8,8 @@ const DataType = dtype.DataType;
 const DateTime = dtype.DateTime;
 const UUID = dtype.UUID;
 // TODO: Move this from FileEngine
 const ZipponError = @import("error").ZipponError;
 pub fn writeEntityTable(
--- a/src/file/read.zig
+++ b/src/file/read.zig
@ -240,7 +240,6 @@ pub fn parseEntities(
    const allocator = arena.allocator();
    var buff = std.ArrayList(u8).init(entry_allocator);
    defer buff.deinit();
    const writer = buff.writer();
    const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
@ -268,30 +267,35 @@ pub fn parseEntities(
    // But at the end, only the number of use CPU/Thread will use list simultanously
    // So I could pass list from a thread to another technicly
    var thread_writer_list = allocator.alloc(std.ArrayList(u8), to_parse.len) catch return ZipponError.MemoryError;
    const data_types = try self.schema_engine.structName2DataType(struct_name);
    // Start parsing all file in multiple thread
    var wg: std.Thread.WaitGroup = .{};
    for (to_parse, 0..) |file_index, i| {
        thread_writer_list[file_index] = std.ArrayList(u8).init(allocator);
-        self.thread_pool.spawn(parseEntitiesOneFile, .{
+        self.thread_pool.spawnWg(
-            thread_writer_list[i].writer(),
+            &wg,
-            file_index,
+            parseEntitiesOneFile,
-            dir,
+            .{
-            sstruct.zid_schema,
+                thread_writer_list[i].writer(),
-            filter,
+                file_index,
-            additional_data.*,
+                dir,
-            try self.schema_engine.structName2DataType(struct_name),
+                sstruct.zid_schema,
-            &sync_context,
+                filter,
-        }) catch return ZipponError.ThreadError;
+                additional_data.*,
                data_types,
                &sync_context,
            },
        );
    }
-
+    wg.wait();
    // Wait for all thread to either finish or return an error
    while (!sync_context.isComplete()) std.time.sleep(100_000); // Check every 0.1ms
    // Append all writer to each other
    writer.writeByte('[') catch return ZipponError.WriteError;
    for (thread_writer_list) |list| writer.writeAll(list.items) catch return ZipponError.WriteError;
    writer.writeByte(']') catch return ZipponError.WriteError;
    for (thread_writer_list) |list| list.deinit();
    // Now I need to do the relation stuff, meaning parsing new files to get the relationship value
    // Without relationship to return, this function is basically finish here
@ -321,21 +325,12 @@ fn parseEntitiesOneFile(
    defer fa.reset();
    const allocator = fa.allocator();
-    const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| {
+    const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
-        sync_context.logError("Error creating file path", err);
+    var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch return;
        return;
    };
-    var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch |err| {
+    while (iter.next() catch return) |row| {
-        sync_context.logError("Error initializing DataIterator", err);
+        fa.reset();
-        return;
+        if (sync_context.checkStructLimit()) return;
    };
    while (iter.next() catch |err| {
        sync_context.logError("Error in iter next", err);
        return;
    }) |row| {
        if (sync_context.checkStructLimit()) break;
        if (filter) |f| if (!f.evaluate(row)) continue;
        EntityWriter.writeEntityJSON(
@ -343,14 +338,10 @@ fn parseEntitiesOneFile(
            row,
            additional_data,
            data_types,
-        ) catch |err| {
+        ) catch return;
            sync_context.logError("Error writing entity", err);
            return;
        };
        if (sync_context.incrementAndCheckStructLimit()) break;
    }
-    _ = sync_context.completeThread();
+        if (sync_context.incrementAndCheckStructLimit()) return;
    }
 }
 // Receive a map of UUID -> empty JsonString
@ -410,22 +401,25 @@ pub fn parseEntitiesRelationMap(
    ) catch return ZipponError.MemoryError;
    // Start parsing all file in multiple thread
    var wg: std.Thread.WaitGroup = .{};
    for (to_parse, 0..) |file_index, i| {
        thread_map_list[i] = relation_map.map.cloneWithAllocator(allocator) catch return ZipponError.MemoryError;
-        self.thread_pool.spawn(parseEntitiesRelationMapOneFile, .{
+        self.thread_pool.spawnWg(
-            &thread_map_list[i],
+            &wg,
-            file_index,
+            parseEntitiesRelationMapOneFile,
-            dir,
+            .{
-            sstruct.zid_schema,
+                &thread_map_list[i],
-            relation_map.additional_data,
+                file_index,
-            try self.schema_engine.structName2DataType(struct_name),
+                dir,
-            &sync_context,
+                sstruct.zid_schema,
-        }) catch return ZipponError.ThreadError;
+                relation_map.additional_data,
                try self.schema_engine.structName2DataType(struct_name),
                &sync_context,
            },
        );
    }
-
+    wg.wait();
    // Wait for all thread to either finish or return an error
    while (!sync_context.isComplete()) std.time.sleep(100_000); // Check every 0.1ms
    // Now here I should have a list of copy of the map with all UUID a bit everywhere
@ -470,21 +464,11 @@ fn parseEntitiesRelationMapOneFile(
    var string_list = std.ArrayList(u8).init(allocator);
    const writer = string_list.writer();
-    const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| {
+    const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
-        sync_context.logError("Error creating file path", err);
+    var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch return;
        return;
    };
-    var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch |err| {
+    while (iter.next() catch return) |row| {
-        sync_context.logError("Error initializing DataIterator", err);
+        if (sync_context.checkStructLimit()) return;
        return;
    };
    while (iter.next() catch |err| {
        sync_context.logError("Error in iter next", err);
        return;
    }) |row| {
        if (sync_context.checkStructLimit()) break;
        if (!map.contains(row[0].UUID)) continue;
        defer string_list.clearRetainingCapacity();
@ -493,23 +477,12 @@ fn parseEntitiesRelationMapOneFile(
            row,
            additional_data,
            data_types,
-        ) catch |err| {
+        ) catch return;
-            sync_context.logError("Error writing entity", err);
+
            return;
        };
        map.put(row[0].UUID, JsonString{
-            .slice = parent_alloc.dupe(u8, string_list.items) catch |err| {
+            .slice = parent_alloc.dupe(u8, string_list.items) catch return,
                sync_context.logError("Error duping data", err);
                return;
            },
            .init = true,
-        }) catch |err| {
+        }) catch return;
-            sync_context.logError("Error writing entity", err);
+        if (sync_context.incrementAndCheckStructLimit()) return;
            return;
        };
        if (sync_context.incrementAndCheckStructLimit()) break;
    }
    _ = sync_context.completeThread();
 }
--- a/src/main.zig
+++ b/src/main.zig
@ -11,13 +11,14 @@ var date_buffer: [64]u8 = undefined;
 var date_fa = std.heap.FixedBufferAllocator.init(&date_buffer);
 const date_allocator = date_fa.allocator();
-pub const std_options = .{
+pub const std_options = std.Options{
    .log_level = .info,
    .logFn = myLog,
 };
 pub fn myLog(
    comptime message_level: std.log.Level,
-    comptime scope: @Type(.EnumLiteral),
+    comptime scope: @Type(.enum_literal),
    comptime format: []const u8,
    args: anytype,
 ) void {
@ -48,10 +49,17 @@ pub fn setLogPath(path: []const u8) void {
 }
 pub fn main() !void {
-    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    var gpa = std.heap.GeneralPurposeAllocator(.{ .safety = true }){};
-    defer arena.deinit();
+    const allocator = gpa.allocator();
    defer {
        const deinit_status = gpa.deinit();
        switch (deinit_status) {
            .leak => std.debug.print("Leak...\n", .{}),
            .ok => {},
        }
    }
-    var cli = Cli.init(arena.allocator(), null, null);
+    var cli = Cli.init(allocator, null, null);
    defer cli.deinit();
    try cli.start();
--- a/src/thread/context.zig
+++ b/src/thread/context.zig
@ -2,6 +2,8 @@ const std = @import("std");
 const log = std.log.scoped(.thread);
 const U64 = std.atomic.Value(u64);
 // Remove the use waitgroup instead
 pub const Self = @This();
 processed_struct: U64 = U64.init(0),
@ -27,13 +29,13 @@ pub fn completeThread(self: *Self) void {
 pub fn incrementAndCheckStructLimit(self: *Self) bool {
    if (self.max_struct == 0) return false;
-    const new_count = self.processed_struct.fetchAdd(1, .monotonic);
+    const new_count = self.processed_struct.fetchAdd(1, .acquire);
    return (new_count + 1) >= self.max_struct;
 }
 pub fn checkStructLimit(self: *Self) bool {
    if (self.max_struct == 0) return false;
-    const count = self.processed_struct.load(.monotonic);
+    const count = self.processed_struct.load(.acquire);
    return (count) >= self.max_struct;
 }
--- a/src/ziql/parser.zig
+++ b/src/ziql/parser.zig
@ -80,8 +80,8 @@ pub fn init(file_engine: *FileEngine, schema_engine: *SchemaEngine) Self {
    };
 }
-pub fn parse(self: *Self, buffer: [:0]const u8) ZipponError!void {
+pub fn parse(self: *Self, parent_allocator: Allocator, buffer: [:0]const u8) ZipponError!void {
-    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    var arena = std.heap.ArenaAllocator.init(parent_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();