diff --git a/src/fileEngine.zig b/src/fileEngine.zig index 24b68b4..c2bfdee 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -104,29 +104,28 @@ pub const FileEngine = struct { const writer = buffer.writer(); writer.print("Database path: {s}\n", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.WriteError; const main_size = utils.getDirTotalSize(main_dir) catch 0; - writer.print("Total size: {d:.2}Mb\n", .{@as(f64, @floatFromInt(main_size)) / 1e6}) catch return FileEngineError.WriteError; + writer.print("Total size: {d:.2}Mb\n", .{@as(f64, @floatFromInt(main_size)) / 1024.0 / 1024.0}) catch return FileEngineError.WriteError; const log_dir = main_dir.openDir("LOG", .{ .iterate = true }) catch return FileEngineError.CantOpenDir; const log_size = utils.getDirTotalSize(log_dir) catch 0; - writer.print("LOG: {d:.2}Mb\n", .{@as(f64, @floatFromInt(log_size)) / 1e6}) catch return FileEngineError.WriteError; + writer.print("LOG: {d:.2}Mb\n", .{@as(f64, @floatFromInt(log_size)) / 1024.0 / 1024.0}) catch return FileEngineError.WriteError; const backup_dir = main_dir.openDir("BACKUP", .{ .iterate = true }) catch return FileEngineError.CantOpenDir; const backup_size = utils.getDirTotalSize(backup_dir) catch 0; - writer.print("BACKUP: {d:.2}Mb\n", .{@as(f64, @floatFromInt(backup_size)) / 1e6}) catch return FileEngineError.WriteError; + writer.print("BACKUP: {d:.2}Mb\n", .{@as(f64, @floatFromInt(backup_size)) / 1024.0 / 1024.0}) catch return FileEngineError.WriteError; const data_dir = main_dir.openDir("DATA", .{ .iterate = true }) catch return FileEngineError.CantOpenDir; const data_size = utils.getDirTotalSize(data_dir) catch 0; - writer.print("DATA: {d:.2}Mb\n", .{@as(f64, @floatFromInt(data_size)) / 1e6}) catch return FileEngineError.WriteError; + writer.print("DATA: {d:.2}Mb\n", .{@as(f64, @floatFromInt(data_size)) / 1024.0 / 1024.0}) catch return FileEngineError.WriteError; var iter = data_dir.iterate(); while (iter.next() catch return FileEngineError.DirIterError) |entry| { if (entry.kind != .directory) continue; const sub_dir = data_dir.openDir(entry.name, .{ .iterate = true }) catch return FileEngineError.CantOpenDir; const size = utils.getDirTotalSize(sub_dir) catch 0; - // FIXME: This is not really MB writer.print(" {s}: {d:.}Mb {d} entities\n", .{ entry.name, - @as(f64, @floatFromInt(size)) / 1e6, + @as(f64, @floatFromInt(size)) / 1024.0 / 1024.0, try self.getNumberOfEntity(entry.name), }) catch return FileEngineError.WriteError; } @@ -224,12 +223,109 @@ pub const FileEngine = struct { return count; } + /// Populate a map with all UUID bytes as key and file index as value + /// This map is store in the SchemaStruct to then by using a list of UUID, get a list of file_index to parse + pub fn populateFileIndexUUIDMap( + self: *FileEngine, + struct_name: []const u8, + map: *std.AutoHashMap(UUID, usize), + ) ZipponError!void { + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); + const max_file_index = try self.maxFileIndex(sstruct.name); + + const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{}); + + // Multi-threading setup + var arena = std.heap.ThreadSafeAllocator{ + .child_allocator = self.allocator, + }; + + var pool: std.Thread.Pool = undefined; + defer pool.deinit(); + pool.init(std.Thread.Pool.Options{ + .allocator = arena.allocator(), + .n_jobs = CPU_CORE, + }) catch return ZipponError.ThreadError; + + var sync_context = ThreadSyncContext.init( + 0, + max_file_index + 1, + ); + + // Create a thread-safe writer for each file + var thread_writer_list = self.allocator.alloc(std.ArrayList([16]u8), max_file_index + 1) catch return FileEngineError.MemoryError; + defer { + for (thread_writer_list) |list| list.deinit(); + self.allocator.free(thread_writer_list); + } + + for (thread_writer_list) |*list| { + list.* = std.ArrayList([16]u8).init(self.allocator); + } + + // Spawn threads for each file + for (0..(max_file_index + 1)) |file_index| { + pool.spawn(populateFileIndexUUIDMapOneFile, .{ + sstruct, + &thread_writer_list[file_index], + file_index, + dir, + &sync_context, + }) catch return FileEngineError.ThreadError; + } + + // Wait for all threads to complete + while (!sync_context.isComplete()) { + std.time.sleep(10_000_000); + } + + // Combine results + for (thread_writer_list, 0..) |list, file_index| { + for (list.items) |uuid| map.put(uuid, file_index) catch return ZipponError.MemoryError; + } + } + + fn populateFileIndexUUIDMapOneFile( + sstruct: SchemaStruct, + list: *std.ArrayList(UUID), + file_index: u64, + dir: std.fs.Dir, + sync_context: *ThreadSyncContext, + ) void { + var data_buffer: [BUFFER_SIZE]u8 = undefined; + var fa = std.heap.FixedBufferAllocator.init(&data_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + + var path_buffer: [128]u8 = undefined; + const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { + sync_context.logError("Error creating file path", err); + return; + }; + + var iter = zid.DataIterator.init(allocator, path, dir, sstruct.zid_schema) catch |err| { + sync_context.logError("Error initializing DataIterator", err); + return; + }; + defer iter.deinit(); + + while (iter.next() catch return) |row| { + list.*.append(UUID{ .bytes = row[0].UUID }) catch |err| { + sync_context.logError("Error initializing DataIterator", err); + return; + }; + } + + _ = sync_context.completeThread(); + } + /// Use a struct name and filter to populate a map with all UUID bytes as key and void as value - pub fn populateUUIDMap( + /// This map is use as value for the link array, so I can do a `contains` on it. + pub fn populateVoidUUIDMap( self: *FileEngine, struct_name: []const u8, filter: ?Filter, - map: *std.AutoHashMap([16]u8, void), + map: *std.AutoHashMap(UUID, void), additional_data: *AdditionalData, ) ZipponError!void { const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); @@ -255,19 +351,19 @@ pub const FileEngine = struct { ); // Create a thread-safe writer for each file - var thread_writer_list = self.allocator.alloc(std.ArrayList([16]u8), max_file_index + 1) catch return FileEngineError.MemoryError; + var thread_writer_list = self.allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return FileEngineError.MemoryError; defer { for (thread_writer_list) |list| list.deinit(); self.allocator.free(thread_writer_list); } for (thread_writer_list) |*list| { - list.* = std.ArrayList([16]u8).init(self.allocator); + list.* = std.ArrayList(UUID).init(self.allocator); } // Spawn threads for each file for (0..(max_file_index + 1)) |file_index| { - pool.spawn(populateUUIDMapOneFile, .{ + pool.spawn(populateVoidUUIDMapOneFile, .{ sstruct, filter, &thread_writer_list[file_index], @@ -288,10 +384,10 @@ pub const FileEngine = struct { } } - fn populateUUIDMapOneFile( + fn populateVoidUUIDMapOneFile( sstruct: SchemaStruct, filter: ?Filter, - list: *std.ArrayList([16]u8), + list: *std.ArrayList(UUID), file_index: u64, dir: std.fs.Dir, sync_context: *ThreadSyncContext, @@ -315,7 +411,7 @@ pub const FileEngine = struct { while (iter.next() catch return) |row| { if (filter == null or filter.?.evaluate(row)) { - list.*.append(row[0].UUID) catch |err| { + list.*.append(UUID{ .bytes = row[0].UUID }) catch |err| { sync_context.logError("Error initializing DataIterator", err); return; }; diff --git a/src/schemaParser.zig b/src/schemaParser.zig index bf0c49d..bf21d06 100644 --- a/src/schemaParser.zig +++ b/src/schemaParser.zig @@ -2,9 +2,11 @@ const std = @import("std"); const zid = @import("ZipponData"); const Allocator = std.mem.Allocator; const DataType = @import("dtype").DataType; +const UUID = @import("dtype").UUID; const Toker = @import("tokenizers/schema.zig").Tokenizer; const Token = @import("tokenizers/schema.zig").Token; const Loc = @import("tokenizers/shared/loc.zig").Loc; +const UUIDFileIndex = @import("stuffs/UUIDFileIndex.zig").UUIDIndexMap; const send = @import("stuffs/utils.zig").send; const printError = @import("stuffs/utils.zig").printError; @@ -43,7 +45,7 @@ pub const Parser = struct { types: []DataType, zid_schema: []zid.DType, links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name, like a dtype - // uuid_file_index: std.AutoHashMap([16]u8, u64), // Map UUID to the index of the file they are store in + uuid_file_index: UUIDFileIndex, // Map UUID to the index of the file it is are store in pub fn init( allocator: Allocator, @@ -59,7 +61,7 @@ pub const Parser = struct { .types = types, .zid_schema = SchemaStruct.fileDataSchema(allocator, types) catch return SchemaParserError.MemoryError, .links = links, - //.uuid_file_index = std.AutoHashMap([16]u8, u64).init(allocator), + .uuid_file_index = UUIDFileIndex.init(allocator) catch return SchemaParserError.MemoryError, }; } @@ -68,7 +70,7 @@ pub const Parser = struct { self.allocator.free(self.types); self.allocator.free(self.zid_schema); self.links.deinit(); - //self.uuid_file_index.deinit(); + self.uuid_file_index.deinit(); } fn fileDataSchema(allocator: Allocator, dtypes: []DataType) SchemaParserError![]zid.DType { diff --git a/src/stuffs/RadixTrie.zig.old b/src/stuffs/RadixTrie.zig.old new file mode 100644 index 0000000..b7d3895 --- /dev/null +++ b/src/stuffs/RadixTrie.zig.old @@ -0,0 +1,361 @@ +const std = @import("std"); +const UUID = @import("dtype").UUID; +const ArenaAllocator = std.heap.ArenaAllocator; + +// TODO: +// 1. Basic RadixTrie and Node - OK +// 2. Add one UUID +// 3. Get one file index using one UUID +// 4. Get a list of file index using a list of UUID + +const Node = union(enum) { + branch: *std.StringHashMap(*Node), + leaf: usize, + + fn contains(self: Node, id: []const u8) bool { + return switch (self) { + .leaf => id.len == 0, + .branch => |branch| { + var longest_prefix: usize = 0; + var longest_key: ?[]const u8 = null; + + var it = branch.iterator(); + while (it.next()) |entry| { + const key = entry.key_ptr.*; + const common_prefix = commonPrefix(key, id); + if (common_prefix > longest_prefix) { + longest_prefix = common_prefix; + longest_key = key; + } + } + + if (longest_prefix == 0) { + return false; + } else if (longest_prefix == id.len and longest_key.?.len == id.len) { + return true; + } else if (longest_prefix < id.len) { + const next_node = branch.get(longest_key.?).?; + return next_node.contains(id[longest_prefix..]); + } else { + return false; + } + }, + }; + } + + fn get(self: Node, id: []const u8) ?usize { + switch (self) { + .leaf => |leaf| { + if (id.len == 0) return leaf; + return null; + }, + .branch => |branch| { + var longest_prefix: usize = 0; + var longest_key: ?[]const u8 = null; + + var it = branch.iterator(); + while (it.next()) |entry| { + const key = entry.key_ptr.*; + const common_prefix = commonPrefix(key, id); + if (common_prefix > longest_prefix) { + longest_prefix = common_prefix; + longest_key = key; + } + } + + if (longest_prefix == 0) { + return null; + } else if (longest_prefix == id.len and longest_key.?.len == id.len) { + return branch.get(id).?.leaf; + } else if (longest_prefix < id.len) { + const next_node = branch.get(longest_key.?).?; + return next_node.get(id[longest_prefix..]); + } else { + return null; + } + }, + } + } + + fn insert(self: *Node, arena: *ArenaAllocator, id: []const u8, file_index: usize) !void { + const allocator = arena.allocator(); + switch (self.*) { + .leaf => { + // If we're at a leaf, we need to create a new branch + const new_branch = try allocator.create(std.StringHashMap(*Node)); + new_branch.* = std.StringHashMap(*Node).init(allocator); + + // Move the current leaf to the new branch + try new_branch.put("", self); + + // Create a new leaf for the new UUID + const new_leaf = try allocator.create(Node); + new_leaf.* = Node{ .leaf = file_index }; + try new_branch.put(id, new_leaf); + + // Update the current node to be a branch + self.* = Node{ .branch = new_branch }; + }, + .branch => |branch| { + var longest_prefix: usize = 0; + var longest_key: ?[]const u8 = null; + + // Find the longest common prefix + var it = branch.iterator(); + while (it.next()) |entry| { + const key = entry.key_ptr.*; + const common_prefix = commonPrefix(key, id); + if (common_prefix > longest_prefix) { + longest_prefix = common_prefix; + longest_key = key; + } + } + + if (longest_prefix == 0) { + // No common prefix, add a new leaf + const new_leaf = try allocator.create(Node); + new_leaf.* = Node{ .leaf = file_index }; + try branch.put(try allocator.dupe(u8, id), new_leaf); + } else if (longest_prefix == id.len and longest_key.?.len == id.len) { + // Exact match, update the leaf + const existing_node = branch.get(longest_key.?).?; + existing_node.* = Node{ .leaf = file_index }; + } else { + // Partial match + const common = id[0..longest_prefix]; + const existing_suffix = longest_key.?[longest_prefix..]; + const new_suffix = id[longest_prefix..]; + + if (!branch.contains(common)) { + // Partial match dont exist, split the Node + + // When I explain, I take example that 1000 is already in the branch and we add 1011 + // 1. Create a new Node branch with the common part of the UUID. This will be 10 in our situation + const new_branch = try allocator.create(std.StringHashMap(*Node)); + new_branch.* = std.StringHashMap(*Node).init(allocator); + + const new_node = try allocator.create(Node); + new_node.* = Node{ .branch = new_branch }; + try branch.put(try allocator.dupe(u8, common), new_node); + + // 2. Get the existing leaf key and add the end of the uuid to the new branch. E.g. 00, the last 0 bit of the existing key + const existing_file_index = branch.get(longest_key.?).?; + try new_branch.put(try allocator.dupe(u8, existing_suffix), existing_file_index); + + // 3. Also add the new key, here 11 + const new_leaf = try allocator.create(Node); + new_leaf.* = Node{ .leaf = file_index }; + try new_branch.put(try allocator.dupe(u8, new_suffix), new_leaf); + + // 4. Delete the previous existing key + const kv = branch.fetchRemove(longest_key.?); + allocator.free(kv.?.key); + allocator.destroy(kv.?.value); + } else { + // Partial match exist, add a leaf + const new_leaf = try allocator.create(Node); + new_leaf.* = Node{ .leaf = file_index }; + + var existing_node = branch.get(common).?; + try existing_node.branch.put(try allocator.dupe(u8, new_suffix), new_leaf); + } + } + }, + } + } +}; + +const RadixTrie = struct { + arena: *ArenaAllocator, + root_node: *Node, + + fn init(allocator: std.mem.Allocator) !RadixTrie { + const arena = try allocator.create(ArenaAllocator); + errdefer allocator.destroy(arena); + arena.* = ArenaAllocator.init(allocator); + + const map = try arena.allocator().create(std.StringHashMap(*Node)); + map.* = std.StringHashMap(*Node).init(arena.allocator()); + + const node = try arena.allocator().create(Node); + node.* = Node{ .branch = map }; + + return RadixTrie{ + .root_node = node, + .arena = arena, + }; + } + + fn deinit(self: *RadixTrie) void { + const allocator = self.arena.child_allocator; + self.arena.deinit(); + allocator.destroy(self.arena); + } + + fn insert(self: *RadixTrie, uuid: UUID, file_index: usize) !void { + try self.root_node.*.insert(self.arena, uuid.bytes[0..], file_index); + } + + fn contains(self: RadixTrie, uuid: UUID) bool { + return self.root_node.contains(uuid.bytes[0..]); + } + + fn get(self: RadixTrie, uuid: UUID) ?usize { + return self.root_node.get(uuid.bytes[0..]); + } +}; + +fn commonPrefix(a: []const u8, b: []const u8) usize { + var i: usize = 0; + while (i < a.len and i < b.len and a[i] == b[i]) : (i += 1) {} + return i; +} + +test "Create empty RadixTrie" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); +} + +test "Get UUID in RadixTrie" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); + + const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); + + try radix_trie.insert(uuid, 0); + const expected: usize = 0; + try std.testing.expectEqual(radix_trie.get(uuid), expected); +} + +test "Update UUID in RadixTrie" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); + + const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); + + for (0..1000) |i| { + try radix_trie.insert(uuid, i); + try std.testing.expectEqual(radix_trie.get(uuid), i); + } +} + +test "Splite Node RadixTrie" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); + + const uuid0 = try UUID.parse("00000000-0000-0000-0000-000000000000"); + const uuid1 = try UUID.parse("00000000-0000-0000-0000-000000000001"); + const uuid2 = try UUID.parse("00000000-0000-0000-0000-000000000002"); + try radix_trie.insert(uuid0, 0); + try radix_trie.insert(uuid1, 1); + try radix_trie.insert(uuid2, 2); + + try std.testing.expect(radix_trie.contains(uuid0)); + try std.testing.expect(radix_trie.contains(uuid1)); + try std.testing.expect(radix_trie.contains(uuid2)); + + const expected_values = [_]usize{ 0, 1, 2 }; + try std.testing.expectEqual(radix_trie.get(uuid0), expected_values[0]); + try std.testing.expectEqual(radix_trie.get(uuid1), expected_values[1]); + try std.testing.expectEqual(radix_trie.get(uuid2), expected_values[2]); +} + +test "Multiple Node RadixTrie with Deep Subdivisions" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); + + const uuids = [_][]const u8{ + "00000000-0000-0000-0000-000000000000", + "00000000-0000-0000-0000-000000000001", + "00000000-0000-0000-0000-000000000002", + "10000000-0000-0000-0000-000000000000", + "11000000-0000-0000-0000-000000000000", + "11100000-0000-0000-0000-000000000000", + "11110000-0000-0000-0000-000000000000", + "11111000-0000-0000-0000-000000000000", + "11111100-0000-0000-0000-000000000000", + "11111110-0000-0000-0000-000000000000", + "11111111-0000-0000-0000-000000000000", + }; + + // Insert UUIDs + for (uuids, 0..) |uuid_str, i| { + const uuid = try UUID.parse(uuid_str); + try radix_trie.insert(uuid, i); + } + + // Test contains and get + for (uuids, 0..) |uuid_str, i| { + const uuid = try UUID.parse(uuid_str); + try std.testing.expect(radix_trie.contains(uuid)); + try std.testing.expectEqual(radix_trie.get(uuid).?, i); + } + + // Test non-existent UUIDs + const non_existent_uuids = [_][]const u8{ + "ffffffff-ffff-ffff-ffff-ffffffffffff", + "22222222-2222-2222-2222-222222222222", + "11111111-1111-1111-1111-111111111111", + }; + + for (non_existent_uuids) |uuid_str| { + const uuid = try UUID.parse(uuid_str); + std.debug.print("{s}\n", .{uuid_str}); + try std.testing.expect(!radix_trie.contains(uuid)); + try std.testing.expectEqual(radix_trie.get(uuid), null); + } + + // Test partial matches + const partial_matches = [_]struct { uuid: []const u8, expected_value: ?usize }{ + .{ .uuid = "00000000-0000-0000-0000-000000000003", .expected_value = null }, + .{ .uuid = "10000000-0000-0000-0000-000000000001", .expected_value = null }, + .{ .uuid = "11100000-0000-0000-0000-000000000001", .expected_value = null }, + .{ .uuid = "11111111-1000-0000-0000-000000000000", .expected_value = null }, + }; + + for (partial_matches) |pm| { + const uuid = try UUID.parse(pm.uuid); + try std.testing.expectEqual(pm.expected_value, radix_trie.get(uuid)); + } +} + +test "Radix benchmark insert" { + const allocator = std.testing.allocator; + + var radix_trie = try RadixTrie.init(allocator); + defer radix_trie.deinit(); + + for (0..10_000) |_| { + const uuid = UUID.init(); + try radix_trie.insert(uuid, 0); + _ = radix_trie.contains(uuid); + } + + std.debug.print("Memory use: {d}\n", .{radix_trie.arena.queryCapacity()}); +} + +test "Hashmap benchmark" { + const allocator = std.testing.allocator; + var arena = ArenaAllocator.init(allocator); + defer arena.deinit(); + + var map = std.AutoHashMap(UUID, usize).init(arena.allocator()); + + for (0..10_000) |_| { + const uuid = UUID.init(); + try map.put(uuid, 0); + _ = map.contains(uuid); + } + + std.debug.print("Memory use: {d}\n", .{arena.queryCapacity()}); +} diff --git a/src/stuffs/UUIDFileIndex.zig b/src/stuffs/UUIDFileIndex.zig new file mode 100644 index 0000000..dc85757 --- /dev/null +++ b/src/stuffs/UUIDFileIndex.zig @@ -0,0 +1,181 @@ +const std = @import("std"); +const UUID = @import("dtype").UUID; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const UUIDIndexMap = struct { + arena: *ArenaAllocator, + map: *std.AutoHashMap(UUID, usize), + + pub fn init(allocator: std.mem.Allocator) !UUIDIndexMap { + const arena = try allocator.create(ArenaAllocator); + errdefer allocator.destroy(arena); + arena.* = ArenaAllocator.init(allocator); + + const map = try arena.allocator().create(std.AutoHashMap(UUID, usize)); + map.* = std.AutoHashMap(UUID, usize).init(arena.allocator()); + + return UUIDIndexMap{ + .map = map, + .arena = arena, + }; + } + + pub fn deinit(self: *UUIDIndexMap) void { + const allocator = self.arena.child_allocator; + self.arena.deinit(); + allocator.destroy(self.arena); + } + + pub fn put(self: *UUIDIndexMap, uuid: UUID, file_index: usize) !void { + const allocator = self.arena.allocator(); + const new_uuid = try allocator.create(UUID); + new_uuid.* = uuid; + + const new_file_index = try allocator.create(usize); + new_file_index.* = file_index; + + try self.map.*.put(new_uuid.*, new_file_index.*); + } + + pub fn contains(self: UUIDIndexMap, uuid: UUID) bool { + return self.map.contains(uuid); + } + + pub fn get(self: UUIDIndexMap, uuid: UUID) ?usize { + return self.map.get(uuid); + } +}; + +test "Create empty UUIDIndexMap" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); +} + +test "Get UUID in UUIDIndexMap" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); + + const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); + + try imap.put(uuid, 0); + const expected: usize = 0; + try std.testing.expectEqual(imap.get(uuid), expected); +} + +test "Update UUID in UUIDIndexMap" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); + + const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); + + for (0..1000) |i| { + try imap.put(uuid, i); + try std.testing.expectEqual(imap.get(uuid), i); + } +} + +test "UUIDIndexMap multiple keys" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); + + const uuid0 = try UUID.parse("00000000-0000-0000-0000-000000000000"); + const uuid1 = try UUID.parse("00000000-0000-0000-0000-000000000001"); + const uuid2 = try UUID.parse("00000000-0000-0000-0000-000000000002"); + try imap.put(uuid0, 0); + try imap.put(uuid1, 1); + try imap.put(uuid2, 2); + + try std.testing.expect(imap.contains(uuid0)); + try std.testing.expect(imap.contains(uuid1)); + try std.testing.expect(imap.contains(uuid2)); + + const expected_values = [_]usize{ 0, 1, 2 }; + try std.testing.expectEqual(imap.get(uuid0), expected_values[0]); + try std.testing.expectEqual(imap.get(uuid1), expected_values[1]); + try std.testing.expectEqual(imap.get(uuid2), expected_values[2]); +} + +test "Multiple Node UUIDIndexMap with Deep Subdivisions" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); + + const uuids = [_][]const u8{ + "00000000-0000-0000-0000-000000000000", + "00000000-0000-0000-0000-000000000001", + "00000000-0000-0000-0000-000000000002", + "10000000-0000-0000-0000-000000000000", + "11000000-0000-0000-0000-000000000000", + "11100000-0000-0000-0000-000000000000", + "11110000-0000-0000-0000-000000000000", + "11111000-0000-0000-0000-000000000000", + "11111100-0000-0000-0000-000000000000", + "11111110-0000-0000-0000-000000000000", + "11111111-0000-0000-0000-000000000000", + }; + + // Insert UUIDs + for (uuids, 0..) |uuid_str, i| { + const uuid = try UUID.parse(uuid_str); + try imap.put(uuid, i); + } + + // Test contains and get + for (uuids, 0..) |uuid_str, i| { + const uuid = try UUID.parse(uuid_str); + try std.testing.expect(imap.contains(uuid)); + try std.testing.expectEqual(imap.get(uuid).?, i); + } + + // Test non-existent UUIDs + const non_existent_uuids = [_][]const u8{ + "ffffffff-ffff-ffff-ffff-ffffffffffff", + "22222222-2222-2222-2222-222222222222", + "11111111-1111-1111-1111-111111111111", + }; + + for (non_existent_uuids) |uuid_str| { + const uuid = try UUID.parse(uuid_str); + std.debug.print("{s}\n", .{uuid_str}); + try std.testing.expect(!imap.contains(uuid)); + try std.testing.expectEqual(imap.get(uuid), null); + } + + // Test partial matches + const partial_matches = [_]struct { uuid: []const u8, expected_value: ?usize }{ + .{ .uuid = "00000000-0000-0000-0000-000000000003", .expected_value = null }, + .{ .uuid = "10000000-0000-0000-0000-000000000001", .expected_value = null }, + .{ .uuid = "11100000-0000-0000-0000-000000000001", .expected_value = null }, + .{ .uuid = "11111111-1000-0000-0000-000000000000", .expected_value = null }, + }; + + for (partial_matches) |pm| { + const uuid = try UUID.parse(pm.uuid); + try std.testing.expectEqual(pm.expected_value, imap.get(uuid)); + } +} + +test "Radix benchmark insert" { + const allocator = std.testing.allocator; + + var imap = try UUIDIndexMap.init(allocator); + defer imap.deinit(); + + for (0..1_000_000) |_| { + const uuid = UUID.init(); + try imap.put(uuid, 0); + _ = imap.contains(uuid); + } + + const mb: f64 = @as(f64, @floatFromInt(imap.arena.queryCapacity())) / 1024.0 / 1024.0; + std.debug.print("Memory use for 1 000 000: {d}MB\n", .{mb}); +} diff --git a/src/stuffs/UUIDTree.zig b/src/stuffs/UUIDTree.zig deleted file mode 100644 index 533b93e..0000000 --- a/src/stuffs/UUIDTree.zig +++ /dev/null @@ -1,77 +0,0 @@ -const std = @import("std"); -const UUID = @import("dtype").UUID; -const ArenaAllocator = std.heap.ArenaAllocator; - -// Maube use that later, the point is that it take only 16 comparison per UUID and save a lot of memory -// But now that I think about it, 16 comparison vs 1, you get me -pub const UUIDTree = struct { - arena: *ArenaAllocator, - root_node: Node, - len: usize, - - pub fn init(allocator: std.mem.Allocator) UUIDTree { - var arena = ArenaAllocator.init(allocator); - return UUIDTree{ .arena = &arena, .root_node = Node.init(&arena, 0), .len = 0 }; - } - - pub fn deinit(self: *UUIDTree) void { - self.arena.deinit(); - } - - pub fn add(self: *UUIDTree, uuid: UUID) void { - if (self.root_node.add(uuid, self.arena)) self.len += 1; - } - - pub fn isIn(self: UUIDTree, uuid: UUID) bool { - return self.root_node.evaluate(uuid); - } -}; - -const Node = struct { - depth: u4, // Because a UUID is 16 len and u4 have 16 different value - map: std.AutoHashMap(u8, ?Node), - - fn init(arena: *ArenaAllocator, depth: u4) Node { - const allocator = arena.*.allocator(); - return Node{ - .depth = depth, - .map = std.AutoHashMap(u8, ?Node).init(allocator), - }; - } - fn evaluate(self: Node, _: UUID) bool { - return switch (self.depth) { - 15 => true, - else => false, - }; - } - - fn add(self: *Node, uuid: UUID, arena: *ArenaAllocator) bool { - switch (self.depth) { - 15 => { - const c = uuid.bytes[self.depth]; - std.debug.print("{b}\n", .{c}); - - if (self.map.get(c)) |_| { - std.debug.print("UUID already in map\n", .{}); - return false; - } else { - self.map.put(c, null) catch return false; - return true; - } - }, - else => { - const c = uuid.bytes[self.depth]; - std.debug.print("{b}\n", .{c}); - - // Could use getOrPut for perf I think - if (self.map.getPtr(c)) |next_node| { - return next_node.*.?.add(uuid, arena); - } else { - var new_node = Node.init(arena, self.depth + 1); - self.map.put(c, new_node) catch return false; - return new_node.add(uuid, arena); - } - }, - } - } -}; diff --git a/src/stuffs/filter.zig b/src/stuffs/filter.zig index d3eaa9c..360f782 100644 --- a/src/stuffs/filter.zig +++ b/src/stuffs/filter.zig @@ -65,7 +65,7 @@ pub const ConditionValue = union(enum) { float_array: std.ArrayList(f64), bool_array: std.ArrayList(bool), unix_array: std.ArrayList(u64), - link_array: *std.AutoHashMap([16]u8, void), + link_array: *std.AutoHashMap(UUID, void), pub fn deinit(self: ConditionValue) void { switch (self) { @@ -148,7 +148,7 @@ pub const ConditionValue = union(enum) { return ConditionValue{ .unix_array = s2t.parseArrayDatetimeUnix(allocator, value) }; } - pub fn initLinkArray(value: *std.AutoHashMap([16]u8, void)) ConditionValue { + pub fn initLinkArray(value: *std.AutoHashMap(UUID, void)) ConditionValue { return ConditionValue{ .link_array = value }; } }; @@ -384,7 +384,7 @@ test "ConditionValue: link" { const allocator = std.testing.allocator; // Create a hash map for storing UUIDs - var hash_map = std.AutoHashMap([16]u8, void).init(allocator); + var hash_map = std.AutoHashMap(UUID, void).init(allocator); defer hash_map.deinit(); // Create a UUID to add to the hash map @@ -392,8 +392,8 @@ test "ConditionValue: link" { const uuid2 = try UUID.parse("223e4567-e89b-12d3-a456-426614174000"); // Add UUIDs to the hash map - try hash_map.put(uuid1.bytes, {}); - try hash_map.put(uuid2.bytes, {}); + try hash_map.put(uuid1, {}); + try hash_map.put(uuid2, {}); // Create a ConditionValue with the link var value = ConditionValue.initLinkArray(&hash_map); @@ -402,6 +402,6 @@ test "ConditionValue: link" { try std.testing.expectEqual(@as(usize, 2), value.link_array.count()); // Check that specific UUIDs are in the hash map - try std.testing.expect(value.link_array.contains(uuid1.bytes)); - try std.testing.expect(value.link_array.contains(uuid2.bytes)); + try std.testing.expect(value.link_array.contains(uuid1)); + try std.testing.expect(value.link_array.contains(uuid2)); } diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index b3f9cc8..8d842f6 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -669,8 +669,8 @@ pub const Parser = struct { .datetime => condition.value = ConditionValue.initDateTime(self.toker.buffer[start_index..token.loc.end]), .bool => condition.value = ConditionValue.initBool(self.toker.buffer[start_index..token.loc.end]), .link_array => { - var map = std.AutoHashMap([16]u8, void).init(self.allocator); - try self.file_engine.populateUUIDMap( + var map = std.AutoHashMap(UUID, void).init(self.allocator); + try self.file_engine.populateVoidUUIDMap( struct_name, filter, &map,