ZipponDB/src/dataStructure/RadixTrie.zig.old

361 lines
13 KiB
Zig

const std = @import("std");
const UUID = @import("dtype").UUID;
const ArenaAllocator = std.heap.ArenaAllocator;
// 1. Basic RadixTrie and Node - OK
// 2. Add one UUID
// 3. Get one file index using one UUID
// 4. Get a list of file index using a list of UUID
const Node = union(enum) {
branch: *std.StringHashMap(*Node),
leaf: usize,
fn contains(self: Node, id: []const u8) bool {
return switch (self) {
.leaf => id.len == 0,
.branch => |branch| {
var longest_prefix: usize = 0;
var longest_key: ?[]const u8 = null;
var it = branch.iterator();
while (it.next()) |entry| {
const key = entry.key_ptr.*;
const common_prefix = commonPrefix(key, id);
if (common_prefix > longest_prefix) {
longest_prefix = common_prefix;
longest_key = key;
}
}
if (longest_prefix == 0) {
return false;
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
return true;
} else if (longest_prefix < id.len) {
const next_node = branch.get(longest_key.?).?;
return next_node.contains(id[longest_prefix..]);
} else {
return false;
}
},
};
}
fn get(self: Node, id: []const u8) ?usize {
switch (self) {
.leaf => |leaf| {
if (id.len == 0) return leaf;
return null;
},
.branch => |branch| {
var longest_prefix: usize = 0;
var longest_key: ?[]const u8 = null;
var it = branch.iterator();
while (it.next()) |entry| {
const key = entry.key_ptr.*;
const common_prefix = commonPrefix(key, id);
if (common_prefix > longest_prefix) {
longest_prefix = common_prefix;
longest_key = key;
}
}
if (longest_prefix == 0) {
return null;
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
return branch.get(id).?.leaf;
} else if (longest_prefix < id.len) {
const next_node = branch.get(longest_key.?).?;
return next_node.get(id[longest_prefix..]);
} else {
return null;
}
},
}
}
fn insert(self: *Node, arena: *ArenaAllocator, id: []const u8, file_index: usize) !void {
const allocator = arena.allocator();
switch (self.*) {
.leaf => {
// If we're at a leaf, we need to create a new branch
const new_branch = try allocator.create(std.StringHashMap(*Node));
new_branch.* = std.StringHashMap(*Node).init(allocator);
// Move the current leaf to the new branch
try new_branch.put("", self);
// Create a new leaf for the new UUID
const new_leaf = try allocator.create(Node);
new_leaf.* = Node{ .leaf = file_index };
try new_branch.put(id, new_leaf);
// Update the current node to be a branch
self.* = Node{ .branch = new_branch };
},
.branch => |branch| {
var longest_prefix: usize = 0;
var longest_key: ?[]const u8 = null;
// Find the longest common prefix
var it = branch.iterator();
while (it.next()) |entry| {
const key = entry.key_ptr.*;
const common_prefix = commonPrefix(key, id);
if (common_prefix > longest_prefix) {
longest_prefix = common_prefix;
longest_key = key;
}
}
if (longest_prefix == 0) {
// No common prefix, add a new leaf
const new_leaf = try allocator.create(Node);
new_leaf.* = Node{ .leaf = file_index };
try branch.put(try allocator.dupe(u8, id), new_leaf);
} else if (longest_prefix == id.len and longest_key.?.len == id.len) {
// Exact match, update the leaf
const existing_node = branch.get(longest_key.?).?;
existing_node.* = Node{ .leaf = file_index };
} else {
// Partial match
const common = id[0..longest_prefix];
const existing_suffix = longest_key.?[longest_prefix..];
const new_suffix = id[longest_prefix..];
if (!branch.contains(common)) {
// Partial match dont exist, split the Node
// When I explain, I take example that 1000 is already in the branch and we add 1011
// 1. Create a new Node branch with the common part of the UUID. This will be 10 in our situation
const new_branch = try allocator.create(std.StringHashMap(*Node));
new_branch.* = std.StringHashMap(*Node).init(allocator);
const new_node = try allocator.create(Node);
new_node.* = Node{ .branch = new_branch };
try branch.put(try allocator.dupe(u8, common), new_node);
// 2. Get the existing leaf key and add the end of the uuid to the new branch. E.g. 00, the last 0 bit of the existing key
const existing_file_index = branch.get(longest_key.?).?;
try new_branch.put(try allocator.dupe(u8, existing_suffix), existing_file_index);
// 3. Also add the new key, here 11
const new_leaf = try allocator.create(Node);
new_leaf.* = Node{ .leaf = file_index };
try new_branch.put(try allocator.dupe(u8, new_suffix), new_leaf);
// 4. Delete the previous existing key
const kv = branch.fetchRemove(longest_key.?);
allocator.free(kv.?.key);
allocator.destroy(kv.?.value);
} else {
// Partial match exist, add a leaf
const new_leaf = try allocator.create(Node);
new_leaf.* = Node{ .leaf = file_index };
var existing_node = branch.get(common).?;
try existing_node.branch.put(try allocator.dupe(u8, new_suffix), new_leaf);
}
}
},
}
}
};
const RadixTrie = struct {
arena: *ArenaAllocator,
root_node: *Node,
fn init(allocator: std.mem.Allocator) !RadixTrie {
const arena = try allocator.create(ArenaAllocator);
errdefer allocator.destroy(arena);
arena.* = ArenaAllocator.init(allocator);
const map = try arena.allocator().create(std.StringHashMap(*Node));
map.* = std.StringHashMap(*Node).init(arena.allocator());
const node = try arena.allocator().create(Node);
node.* = Node{ .branch = map };
return RadixTrie{
.root_node = node,
.arena = arena,
};
}
fn deinit(self: *RadixTrie) void {
const allocator = self.arena.child_allocator;
self.arena.deinit();
allocator.destroy(self.arena);
}
fn insert(self: *RadixTrie, uuid: UUID, file_index: usize) !void {
try self.root_node.*.insert(self.arena, uuid.bytes[0..], file_index);
}
fn contains(self: RadixTrie, uuid: UUID) bool {
return self.root_node.contains(uuid.bytes[0..]);
}
fn get(self: RadixTrie, uuid: UUID) ?usize {
return self.root_node.get(uuid.bytes[0..]);
}
};
fn commonPrefix(a: []const u8, b: []const u8) usize {
var i: usize = 0;
while (i < a.len and i < b.len and a[i] == b[i]) : (i += 1) {}
return i;
}
test "Create empty RadixTrie" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
}
test "Get UUID in RadixTrie" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000");
try radix_trie.insert(uuid, 0);
const expected: usize = 0;
try std.testing.expectEqual(radix_trie.get(uuid), expected);
}
test "Update UUID in RadixTrie" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
const uuid = try UUID.parse("00000000-0000-0000-0000-000000000000");
for (0..1000) |i| {
try radix_trie.insert(uuid, i);
try std.testing.expectEqual(radix_trie.get(uuid), i);
}
}
test "Splite Node RadixTrie" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
const uuid0 = try UUID.parse("00000000-0000-0000-0000-000000000000");
const uuid1 = try UUID.parse("00000000-0000-0000-0000-000000000001");
const uuid2 = try UUID.parse("00000000-0000-0000-0000-000000000002");
try radix_trie.insert(uuid0, 0);
try radix_trie.insert(uuid1, 1);
try radix_trie.insert(uuid2, 2);
try std.testing.expect(radix_trie.contains(uuid0));
try std.testing.expect(radix_trie.contains(uuid1));
try std.testing.expect(radix_trie.contains(uuid2));
const expected_values = [_]usize{ 0, 1, 2 };
try std.testing.expectEqual(radix_trie.get(uuid0), expected_values[0]);
try std.testing.expectEqual(radix_trie.get(uuid1), expected_values[1]);
try std.testing.expectEqual(radix_trie.get(uuid2), expected_values[2]);
}
test "Multiple Node RadixTrie with Deep Subdivisions" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
const uuids = [_][]const u8{
"00000000-0000-0000-0000-000000000000",
"00000000-0000-0000-0000-000000000001",
"00000000-0000-0000-0000-000000000002",
"10000000-0000-0000-0000-000000000000",
"11000000-0000-0000-0000-000000000000",
"11100000-0000-0000-0000-000000000000",
"11110000-0000-0000-0000-000000000000",
"11111000-0000-0000-0000-000000000000",
"11111100-0000-0000-0000-000000000000",
"11111110-0000-0000-0000-000000000000",
"11111111-0000-0000-0000-000000000000",
};
// Insert UUIDs
for (uuids, 0..) |uuid_str, i| {
const uuid = try UUID.parse(uuid_str);
try radix_trie.insert(uuid, i);
}
// Test contains and get
for (uuids, 0..) |uuid_str, i| {
const uuid = try UUID.parse(uuid_str);
try std.testing.expect(radix_trie.contains(uuid));
try std.testing.expectEqual(radix_trie.get(uuid).?, i);
}
// Test non-existent UUIDs
const non_existent_uuids = [_][]const u8{
"ffffffff-ffff-ffff-ffff-ffffffffffff",
"22222222-2222-2222-2222-222222222222",
"11111111-1111-1111-1111-111111111111",
};
for (non_existent_uuids) |uuid_str| {
const uuid = try UUID.parse(uuid_str);
std.debug.print("{s}\n", .{uuid_str});
try std.testing.expect(!radix_trie.contains(uuid));
try std.testing.expectEqual(radix_trie.get(uuid), null);
}
// Test partial matches
const partial_matches = [_]struct { uuid: []const u8, expected_value: ?usize }{
.{ .uuid = "00000000-0000-0000-0000-000000000003", .expected_value = null },
.{ .uuid = "10000000-0000-0000-0000-000000000001", .expected_value = null },
.{ .uuid = "11100000-0000-0000-0000-000000000001", .expected_value = null },
.{ .uuid = "11111111-1000-0000-0000-000000000000", .expected_value = null },
};
for (partial_matches) |pm| {
const uuid = try UUID.parse(pm.uuid);
try std.testing.expectEqual(pm.expected_value, radix_trie.get(uuid));
}
}
test "Radix benchmark insert" {
const allocator = std.testing.allocator;
var radix_trie = try RadixTrie.init(allocator);
defer radix_trie.deinit();
for (0..10_000) |_| {
const uuid = UUID.init();
try radix_trie.insert(uuid, 0);
_ = radix_trie.contains(uuid);
}
std.debug.print("Memory use: {d}\n", .{radix_trie.arena.queryCapacity()});
}
test "Hashmap benchmark" {
const allocator = std.testing.allocator;
var arena = ArenaAllocator.init(allocator);
defer arena.deinit();
var map = std.AutoHashMap(UUID, usize).init(arena.allocator());
for (0..10_000) |_| {
const uuid = UUID.init();
try map.put(uuid, 0);
_ = map.contains(uuid);
}
std.debug.print("Memory use: {d}\n", .{arena.queryCapacity()});
}