361 lines
13 KiB
Zig
361 lines
13 KiB
Zig
const std = @import("std");
|
|
const UUID = @import("dtype").UUID;
|
|
const ArenaAllocator = std.heap.ArenaAllocator;
|
|
|
|
// 1. Basic RadixTrie and Node - OK
|
|
// 2. Add one UUID
|
|
// 3. Get one file index using one UUID
|
|
// 4. Get a list of file index using a list of UUID
|
|
|
|
const Node = union(enum) {
|
|
branch: *std.StringHashMap(*Node),
|
|
leaf: usize,
|
|
|
|
fn contains(self: Node, id: []const u8) bool {
|
|
return switch (self) {
|
|
.leaf => id.len == 0,
|
|
.branch => |branch| {
|
|
var longest_prefix: usize = 0;
|
|
var longest_key: ?[]const u8 = null;
|
|
|
|
var it = branch.iterator();
|
|
while (it.next()) |entry| {
|
|
const key = entry.key_ptr.*;
|
|
const common_prefix = commonPrefix(key, id);
|
|
if (common_prefix > longest_prefix) {
|
|
longest_prefix = common_prefix;
|
|
longest_key = key;
|
|
}
|
|
}
|
|
|
|
if (longest_prefix == 0) {
|
|
return false;
|
|
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
|
|
return true;
|
|
} else if (longest_prefix < id.len) {
|
|
const next_node = branch.get(longest_key.?).?;
|
|
return next_node.contains(id[longest_prefix..]);
|
|
} else {
|
|
return false;
|
|
}
|
|
},
|
|
};
|
|
}
|
|
|
|
fn get(self: Node, id: []const u8) ?usize {
|
|
switch (self) {
|
|
.leaf => |leaf| {
|
|
if (id.len == 0) return leaf;
|
|
return null;
|
|
},
|
|
.branch => |branch| {
|
|
var longest_prefix: usize = 0;
|
|
var longest_key: ?[]const u8 = null;
|
|
|
|
var it = branch.iterator();
|
|
while (it.next()) |entry| {
|
|
const key = entry.key_ptr.*;
|
|
const common_prefix = commonPrefix(key, id);
|
|
if (common_prefix > longest_prefix) {
|
|
longest_prefix = common_prefix;
|
|
longest_key = key;
|
|
}
|
|
}
|
|
|
|
if (longest_prefix == 0) {
|
|
return null;
|
|
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
|
|
return branch.get(id).?.leaf;
|
|
} else if (longest_prefix < id.len) {
|
|
const next_node = branch.get(longest_key.?).?;
|
|
return next_node.get(id[longest_prefix..]);
|
|
} else {
|
|
return null;
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
fn insert(self: *Node, arena: *ArenaAllocator, id: []const u8, file_index: usize) !void {
|
|
const allocator = arena.allocator();
|
|
switch (self.*) {
|
|
.leaf => {
|
|
// If we're at a leaf, we need to create a new branch
|
|
const new_branch = try allocator.create(std.StringHashMap(*Node));
|
|
new_branch.* = std.StringHashMap(*Node).init(allocator);
|
|
|
|
// Move the current leaf to the new branch
|
|
try new_branch.put("", self);
|
|
|
|
// Create a new leaf for the new UUID
|
|
const new_leaf = try allocator.create(Node);
|
|
new_leaf.* = Node{ .leaf = file_index };
|
|
try new_branch.put(id, new_leaf);
|
|
|
|
// Update the current node to be a branch
|
|
self.* = Node{ .branch = new_branch };
|
|
},
|
|
.branch => |branch| {
|
|
var longest_prefix: usize = 0;
|
|
var longest_key: ?[]const u8 = null;
|
|
|
|
// Find the longest common prefix
|
|
var it = branch.iterator();
|
|
while (it.next()) |entry| {
|
|
const key = entry.key_ptr.*;
|
|
const common_prefix = commonPrefix(key, id);
|
|
if (common_prefix > longest_prefix) {
|
|
longest_prefix = common_prefix;
|
|
longest_key = key;
|
|
}
|
|
}
|
|
|
|
if (longest_prefix == 0) {
|
|
// No common prefix, add a new leaf
|
|
const new_leaf = try allocator.create(Node);
|
|
new_leaf.* = Node{ .leaf = file_index };
|
|
try branch.put(try allocator.dupe(u8, id), new_leaf);
|
|
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
|
|
// Exact match, update the leaf
|
|
const existing_node = branch.get(longest_key.?).?;
|
|
existing_node.* = Node{ .leaf = file_index };
|
|
} else {
|
|
// Partial match
|
|
const common = id[0..longest_prefix];
|
|
const existing_suffix = longest_key.?[longest_prefix..];
|
|
const new_suffix = id[longest_prefix..];
|
|
|
|
if (!branch.contains(common)) {
|
|
// Partial match dont exist, split the Node
|
|
|
|
// When I explain, I take example that 1000 is already in the branch and we add 1011
|
|
// 1. Create a new Node branch with the common part of the UUID. This will be 10 in our situation
|
|
const new_branch = try allocator.create(std.StringHashMap(*Node));
|
|
new_branch.* = std.StringHashMap(*Node).init(allocator);
|
|
|
|
const new_node = try allocator.create(Node);
|
|
new_node.* = Node{ .branch = new_branch };
|
|
try branch.put(try allocator.dupe(u8, common), new_node);
|
|
|
|
// 2. Get the existing leaf key and add the end of the uuid to the new branch. E.g. 00, the last 0 bit of the existing key
|
|
const existing_file_index = branch.get(longest_key.?).?;
|
|
try new_branch.put(try allocator.dupe(u8, existing_suffix), existing_file_index);
|
|
|
|
// 3. Also add the new key, here 11
|
|
const new_leaf = try allocator.create(Node);
|
|
new_leaf.* = Node{ .leaf = file_index };
|
|
try new_branch.put(try allocator.dupe(u8, new_suffix), new_leaf);
|
|
|
|
// 4. Delete the previous existing key
|
|
const kv = branch.fetchRemove(longest_key.?);
|
|
allocator.free(kv.?.key);
|
|
allocator.destroy(kv.?.value);
|
|
} else {
|
|
// Partial match exist, add a leaf
|
|
const new_leaf = try allocator.create(Node);
|
|
new_leaf.* = Node{ .leaf = file_index };
|
|
|
|
var existing_node = branch.get(common).?;
|
|
try existing_node.branch.put(try allocator.dupe(u8, new_suffix), new_leaf);
|
|
}
|
|
}
|
|
},
|
|
}
|
|
}
|
|
};
|
|
|
|
const RadixTrie = struct {
|
|
arena: *ArenaAllocator,
|
|
root_node: *Node,
|
|
|
|
fn init(allocator: std.mem.Allocator) !RadixTrie {
|
|
const arena = try allocator.create(ArenaAllocator);
|
|
errdefer allocator.destroy(arena);
|
|
arena.* = ArenaAllocator.init(allocator);
|
|
|
|
const map = try arena.allocator().create(std.StringHashMap(*Node));
|
|
map.* = std.StringHashMap(*Node).init(arena.allocator());
|
|
|
|
const node = try arena.allocator().create(Node);
|
|
node.* = Node{ .branch = map };
|
|
|
|
return RadixTrie{
|
|
.root_node = node,
|
|
.arena = arena,
|
|
};
|
|
}
|
|
|
|
fn deinit(self: *RadixTrie) void {
|
|
const allocator = self.arena.child_allocator;
|
|
self.arena.deinit();
|
|
allocator.destroy(self.arena);
|
|
}
|
|
|
|
fn insert(self: *RadixTrie, uuid: UUID, file_index: usize) !void {
|
|
try self.root_node.*.insert(self.arena, uuid.bytes[0..], file_index);
|
|
}
|
|
|
|
fn contains(self: RadixTrie, uuid: UUID) bool {
|
|
return self.root_node.contains(uuid.bytes[0..]);
|
|
}
|
|
|
|
fn get(self: RadixTrie, uuid: UUID) ?usize {
|
|
return self.root_node.get(uuid.bytes[0..]);
|
|
}
|
|
};
|
|
|
|
fn commonPrefix(a: []const u8, b: []const u8) usize {
|
|
var i: usize = 0;
|
|
while (i < a.len and i < b.len and a[i] == b[i]) : (i += 1) {}
|
|
return i;
|
|
}
|
|
|
|
test "Create empty RadixTrie" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
}
|
|
|
|
test "Get UUID in RadixTrie" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
|
|
const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000");
|
|
|
|
try radix_trie.insert(uuid, 0);
|
|
const expected: usize = 0;
|
|
try std.testing.expectEqual(radix_trie.get(uuid), expected);
|
|
}
|
|
|
|
test "Update UUID in RadixTrie" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
|
|
const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000");
|
|
|
|
for (0..1000) |i| {
|
|
try radix_trie.insert(uuid, i);
|
|
try std.testing.expectEqual(radix_trie.get(uuid), i);
|
|
}
|
|
}
|
|
|
|
test "Splite Node RadixTrie" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
|
|
const uuid0 = try UUID.parse("00000000-0000-0000-0000-000000000000");
|
|
const uuid1 = try UUID.parse("00000000-0000-0000-0000-000000000001");
|
|
const uuid2 = try UUID.parse("00000000-0000-0000-0000-000000000002");
|
|
try radix_trie.insert(uuid0, 0);
|
|
try radix_trie.insert(uuid1, 1);
|
|
try radix_trie.insert(uuid2, 2);
|
|
|
|
try std.testing.expect(radix_trie.contains(uuid0));
|
|
try std.testing.expect(radix_trie.contains(uuid1));
|
|
try std.testing.expect(radix_trie.contains(uuid2));
|
|
|
|
const expected_values = [_]usize{ 0, 1, 2 };
|
|
try std.testing.expectEqual(radix_trie.get(uuid0), expected_values[0]);
|
|
try std.testing.expectEqual(radix_trie.get(uuid1), expected_values[1]);
|
|
try std.testing.expectEqual(radix_trie.get(uuid2), expected_values[2]);
|
|
}
|
|
|
|
test "Multiple Node RadixTrie with Deep Subdivisions" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
|
|
const uuids = [_][]const u8{
|
|
"00000000-0000-0000-0000-000000000000",
|
|
"00000000-0000-0000-0000-000000000001",
|
|
"00000000-0000-0000-0000-000000000002",
|
|
"10000000-0000-0000-0000-000000000000",
|
|
"11000000-0000-0000-0000-000000000000",
|
|
"11100000-0000-0000-0000-000000000000",
|
|
"11110000-0000-0000-0000-000000000000",
|
|
"11111000-0000-0000-0000-000000000000",
|
|
"11111100-0000-0000-0000-000000000000",
|
|
"11111110-0000-0000-0000-000000000000",
|
|
"11111111-0000-0000-0000-000000000000",
|
|
};
|
|
|
|
// Insert UUIDs
|
|
for (uuids, 0..) |uuid_str, i| {
|
|
const uuid = try UUID.parse(uuid_str);
|
|
try radix_trie.insert(uuid, i);
|
|
}
|
|
|
|
// Test contains and get
|
|
for (uuids, 0..) |uuid_str, i| {
|
|
const uuid = try UUID.parse(uuid_str);
|
|
try std.testing.expect(radix_trie.contains(uuid));
|
|
try std.testing.expectEqual(radix_trie.get(uuid).?, i);
|
|
}
|
|
|
|
// Test non-existent UUIDs
|
|
const non_existent_uuids = [_][]const u8{
|
|
"ffffffff-ffff-ffff-ffff-ffffffffffff",
|
|
"22222222-2222-2222-2222-222222222222",
|
|
"11111111-1111-1111-1111-111111111111",
|
|
};
|
|
|
|
for (non_existent_uuids) |uuid_str| {
|
|
const uuid = try UUID.parse(uuid_str);
|
|
std.debug.print("{s}\n", .{uuid_str});
|
|
try std.testing.expect(!radix_trie.contains(uuid));
|
|
try std.testing.expectEqual(radix_trie.get(uuid), null);
|
|
}
|
|
|
|
// Test partial matches
|
|
const partial_matches = [_]struct { uuid: []const u8, expected_value: ?usize }{
|
|
.{ .uuid = "00000000-0000-0000-0000-000000000003", .expected_value = null },
|
|
.{ .uuid = "10000000-0000-0000-0000-000000000001", .expected_value = null },
|
|
.{ .uuid = "11100000-0000-0000-0000-000000000001", .expected_value = null },
|
|
.{ .uuid = "11111111-1000-0000-0000-000000000000", .expected_value = null },
|
|
};
|
|
|
|
for (partial_matches) |pm| {
|
|
const uuid = try UUID.parse(pm.uuid);
|
|
try std.testing.expectEqual(pm.expected_value, radix_trie.get(uuid));
|
|
}
|
|
}
|
|
|
|
test "Radix benchmark insert" {
|
|
const allocator = std.testing.allocator;
|
|
|
|
var radix_trie = try RadixTrie.init(allocator);
|
|
defer radix_trie.deinit();
|
|
|
|
for (0..10_000) |_| {
|
|
const uuid = UUID.init();
|
|
try radix_trie.insert(uuid, 0);
|
|
_ = radix_trie.contains(uuid);
|
|
}
|
|
|
|
std.debug.print("Memory use: {d}\n", .{radix_trie.arena.queryCapacity()});
|
|
}
|
|
|
|
test "Hashmap benchmark" {
|
|
const allocator = std.testing.allocator;
|
|
var arena = ArenaAllocator.init(allocator);
|
|
defer arena.deinit();
|
|
|
|
var map = std.AutoHashMap(UUID, usize).init(arena.allocator());
|
|
|
|
for (0..10_000) |_| {
|
|
const uuid = UUID.init();
|
|
try map.put(uuid, 0);
|
|
_ = map.contains(uuid);
|
|
}
|
|
|
|
std.debug.print("Memory use: {d}\n", .{arena.queryCapacity()});
|
|
}
|