const std = @import("std"); const UUID = @import("dtype").UUID; const ArenaAllocator = std.heap.ArenaAllocator; // 1. Basic RadixTrie and Node - OK // 2. Add one UUID // 3. Get one file index using one UUID // 4. Get a list of file index using a list of UUID const Node = union(enum) { branch: *std.StringHashMap(*Node), leaf: usize, fn contains(self: Node, id: []const u8) bool { return switch (self) { .leaf => id.len == 0, .branch => |branch| { var longest_prefix: usize = 0; var longest_key: ?[]const u8 = null; var it = branch.iterator(); while (it.next()) |entry| { const key = entry.key_ptr.*; const common_prefix = commonPrefix(key, id); if (common_prefix > longest_prefix) { longest_prefix = common_prefix; longest_key = key; } } if (longest_prefix == 0) { return false; } else if (longest_prefix == id.len and longest_key.?.len == id.len) { return true; } else if (longest_prefix < id.len) { const next_node = branch.get(longest_key.?).?; return next_node.contains(id[longest_prefix..]); } else { return false; } }, }; } fn get(self: Node, id: []const u8) ?usize { switch (self) { .leaf => |leaf| { if (id.len == 0) return leaf; return null; }, .branch => |branch| { var longest_prefix: usize = 0; var longest_key: ?[]const u8 = null; var it = branch.iterator(); while (it.next()) |entry| { const key = entry.key_ptr.*; const common_prefix = commonPrefix(key, id); if (common_prefix > longest_prefix) { longest_prefix = common_prefix; longest_key = key; } } if (longest_prefix == 0) { return null; } else if (longest_prefix == id.len and longest_key.?.len == id.len) { return branch.get(id).?.leaf; } else if (longest_prefix < id.len) { const next_node = branch.get(longest_key.?).?; return next_node.get(id[longest_prefix..]); } else { return null; } }, } } fn insert(self: *Node, arena: *ArenaAllocator, id: []const u8, file_index: usize) !void { const allocator = arena.allocator(); switch (self.*) { .leaf => { // If we're at a leaf, we need to create a new branch const new_branch = try allocator.create(std.StringHashMap(*Node)); new_branch.* = std.StringHashMap(*Node).init(allocator); // Move the current leaf to the new branch try new_branch.put("", self); // Create a new leaf for the new UUID const new_leaf = try allocator.create(Node); new_leaf.* = Node{ .leaf = file_index }; try new_branch.put(id, new_leaf); // Update the current node to be a branch self.* = Node{ .branch = new_branch }; }, .branch => |branch| { var longest_prefix: usize = 0; var longest_key: ?[]const u8 = null; // Find the longest common prefix var it = branch.iterator(); while (it.next()) |entry| { const key = entry.key_ptr.*; const common_prefix = commonPrefix(key, id); if (common_prefix > longest_prefix) { longest_prefix = common_prefix; longest_key = key; } } if (longest_prefix == 0) { // No common prefix, add a new leaf const new_leaf = try allocator.create(Node); new_leaf.* = Node{ .leaf = file_index }; try branch.put(try allocator.dupe(u8, id), new_leaf); } else if (longest_prefix == id.len and longest_key.?.len == id.len) { // Exact match, update the leaf const existing_node = branch.get(longest_key.?).?; existing_node.* = Node{ .leaf = file_index }; } else { // Partial match const common = id[0..longest_prefix]; const existing_suffix = longest_key.?[longest_prefix..]; const new_suffix = id[longest_prefix..]; if (!branch.contains(common)) { // Partial match dont exist, split the Node // When I explain, I take example that 1000 is already in the branch and we add 1011 // 1. Create a new Node branch with the common part of the UUID. This will be 10 in our situation const new_branch = try allocator.create(std.StringHashMap(*Node)); new_branch.* = std.StringHashMap(*Node).init(allocator); const new_node = try allocator.create(Node); new_node.* = Node{ .branch = new_branch }; try branch.put(try allocator.dupe(u8, common), new_node); // 2. Get the existing leaf key and add the end of the uuid to the new branch. E.g. 00, the last 0 bit of the existing key const existing_file_index = branch.get(longest_key.?).?; try new_branch.put(try allocator.dupe(u8, existing_suffix), existing_file_index); // 3. Also add the new key, here 11 const new_leaf = try allocator.create(Node); new_leaf.* = Node{ .leaf = file_index }; try new_branch.put(try allocator.dupe(u8, new_suffix), new_leaf); // 4. Delete the previous existing key const kv = branch.fetchRemove(longest_key.?); allocator.free(kv.?.key); allocator.destroy(kv.?.value); } else { // Partial match exist, add a leaf const new_leaf = try allocator.create(Node); new_leaf.* = Node{ .leaf = file_index }; var existing_node = branch.get(common).?; try existing_node.branch.put(try allocator.dupe(u8, new_suffix), new_leaf); } } }, } } }; const RadixTrie = struct { arena: *ArenaAllocator, root_node: *Node, fn init(allocator: std.mem.Allocator) !RadixTrie { const arena = try allocator.create(ArenaAllocator); errdefer allocator.destroy(arena); arena.* = ArenaAllocator.init(allocator); const map = try arena.allocator().create(std.StringHashMap(*Node)); map.* = std.StringHashMap(*Node).init(arena.allocator()); const node = try arena.allocator().create(Node); node.* = Node{ .branch = map }; return RadixTrie{ .root_node = node, .arena = arena, }; } fn deinit(self: *RadixTrie) void { const allocator = self.arena.child_allocator; self.arena.deinit(); allocator.destroy(self.arena); } fn insert(self: *RadixTrie, uuid: UUID, file_index: usize) !void { try self.root_node.*.insert(self.arena, uuid.bytes[0..], file_index); } fn contains(self: RadixTrie, uuid: UUID) bool { return self.root_node.contains(uuid.bytes[0..]); } fn get(self: RadixTrie, uuid: UUID) ?usize { return self.root_node.get(uuid.bytes[0..]); } }; fn commonPrefix(a: []const u8, b: []const u8) usize { var i: usize = 0; while (i < a.len and i < b.len and a[i] == b[i]) : (i += 1) {} return i; } test "Create empty RadixTrie" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); } test "Get UUID in RadixTrie" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); try radix_trie.insert(uuid, 0); const expected: usize = 0; try std.testing.expectEqual(radix_trie.get(uuid), expected); } test "Update UUID in RadixTrie" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000"); for (0..1000) |i| { try radix_trie.insert(uuid, i); try std.testing.expectEqual(radix_trie.get(uuid), i); } } test "Splite Node RadixTrie" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); const uuid0 = try UUID.parse("00000000-0000-0000-0000-000000000000"); const uuid1 = try UUID.parse("00000000-0000-0000-0000-000000000001"); const uuid2 = try UUID.parse("00000000-0000-0000-0000-000000000002"); try radix_trie.insert(uuid0, 0); try radix_trie.insert(uuid1, 1); try radix_trie.insert(uuid2, 2); try std.testing.expect(radix_trie.contains(uuid0)); try std.testing.expect(radix_trie.contains(uuid1)); try std.testing.expect(radix_trie.contains(uuid2)); const expected_values = [_]usize{ 0, 1, 2 }; try std.testing.expectEqual(radix_trie.get(uuid0), expected_values[0]); try std.testing.expectEqual(radix_trie.get(uuid1), expected_values[1]); try std.testing.expectEqual(radix_trie.get(uuid2), expected_values[2]); } test "Multiple Node RadixTrie with Deep Subdivisions" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); const uuids = [_][]const u8{ "00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002", "10000000-0000-0000-0000-000000000000", "11000000-0000-0000-0000-000000000000", "11100000-0000-0000-0000-000000000000", "11110000-0000-0000-0000-000000000000", "11111000-0000-0000-0000-000000000000", "11111100-0000-0000-0000-000000000000", "11111110-0000-0000-0000-000000000000", "11111111-0000-0000-0000-000000000000", }; // Insert UUIDs for (uuids, 0..) |uuid_str, i| { const uuid = try UUID.parse(uuid_str); try radix_trie.insert(uuid, i); } // Test contains and get for (uuids, 0..) |uuid_str, i| { const uuid = try UUID.parse(uuid_str); try std.testing.expect(radix_trie.contains(uuid)); try std.testing.expectEqual(radix_trie.get(uuid).?, i); } // Test non-existent UUIDs const non_existent_uuids = [_][]const u8{ "ffffffff-ffff-ffff-ffff-ffffffffffff", "22222222-2222-2222-2222-222222222222", "11111111-1111-1111-1111-111111111111", }; for (non_existent_uuids) |uuid_str| { const uuid = try UUID.parse(uuid_str); std.debug.print("{s}\n", .{uuid_str}); try std.testing.expect(!radix_trie.contains(uuid)); try std.testing.expectEqual(radix_trie.get(uuid), null); } // Test partial matches const partial_matches = [_]struct { uuid: []const u8, expected_value: ?usize }{ .{ .uuid = "00000000-0000-0000-0000-000000000003", .expected_value = null }, .{ .uuid = "10000000-0000-0000-0000-000000000001", .expected_value = null }, .{ .uuid = "11100000-0000-0000-0000-000000000001", .expected_value = null }, .{ .uuid = "11111111-1000-0000-0000-000000000000", .expected_value = null }, }; for (partial_matches) |pm| { const uuid = try UUID.parse(pm.uuid); try std.testing.expectEqual(pm.expected_value, radix_trie.get(uuid)); } } test "Radix benchmark insert" { const allocator = std.testing.allocator; var radix_trie = try RadixTrie.init(allocator); defer radix_trie.deinit(); for (0..10_000) |_| { const uuid = UUID.init(); try radix_trie.insert(uuid, 0); _ = radix_trie.contains(uuid); } std.debug.print("Memory use: {d}\n", .{radix_trie.arena.queryCapacity()}); } test "Hashmap benchmark" { const allocator = std.testing.allocator; var arena = ArenaAllocator.init(allocator); defer arena.deinit(); var map = std.AutoHashMap(UUID, usize).init(arena.allocator()); for (0..10_000) |_| { const uuid = UUID.init(); try map.put(uuid, 0); _ = map.contains(uuid); } std.debug.print("Memory use: {d}\n", .{arena.queryCapacity()}); }