433 lines
16 KiB
Zig
433 lines
16 KiB
Zig
const std = @import("std");
|
|
const config = @import("config");
|
|
const utils = @import("../utils.zig");
|
|
const zid = @import("ZipponData");
|
|
const Allocator = std.mem.Allocator;
|
|
const ZipponError = @import("error").ZipponError;
|
|
|
|
const SchemaStruct = @import("../schema/struct.zig");
|
|
const Filter = @import("../dataStructure/filter.zig").Filter;
|
|
const AdditionalData = @import("../dataStructure/additionalData.zig");
|
|
const RelationMap = @import("../dataStructure/relationMap.zig");
|
|
const UUIDFileIndex = @import("../dataStructure/UUIDFileIndex.zig").UUIDIndexMap;
|
|
const JsonString = @import("../dataStructure/relationMap.zig").JsonString;
|
|
const EntityWriter = @import("entityWriter.zig");
|
|
const ThreadSyncContext = @import("../thread/context.zig");
|
|
|
|
const dtype = @import("dtype");
|
|
const s2t = dtype.s2t;
|
|
const UUID = dtype.UUID;
|
|
const DateTime = dtype.DateTime;
|
|
const DataType = dtype.DataType;
|
|
const log = std.log.scoped(.fileEngine);
|
|
|
|
const Self = @import("core.zig").Self;
|
|
|
|
/// Use a struct name to populate a list with all UUID of this struct
|
|
/// TODO: Multi thread that too
|
|
pub fn getNumberOfEntityAndFile(self: *Self, struct_name: []const u8) ZipponError!struct { entity: usize, file: usize } {
|
|
const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
|
|
const to_parse = try self.allFileIndex(self.allocator, struct_name);
|
|
defer self.allocator.free(to_parse);
|
|
|
|
return .{ .entity = sstruct.uuid_file_index.map.count(), .file = to_parse.len };
|
|
}
|
|
|
|
/// Populate a map with all UUID bytes as key and file index as value
|
|
/// This map is store in the SchemaStruct to then by using a list of UUID, get a list of file_index to parse
|
|
pub fn populateFileIndexUUIDMap(
|
|
self: *Self,
|
|
sstruct: SchemaStruct,
|
|
map: *UUIDFileIndex,
|
|
) ZipponError!void {
|
|
var arena = std.heap.ArenaAllocator.init(self.allocator);
|
|
defer arena.deinit();
|
|
var safe_allocator = std.heap.ThreadSafeAllocator{ .child_allocator = arena.allocator() };
|
|
const allocator = safe_allocator.allocator();
|
|
|
|
const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
|
|
const to_parse = try self.allFileIndex(allocator, sstruct.name);
|
|
|
|
// Create a thread-safe writer for each file
|
|
var thread_writer_list = allocator.alloc(std.ArrayList(UUID), to_parse.len) catch return ZipponError.MemoryError;
|
|
defer {
|
|
for (thread_writer_list) |list| list.deinit();
|
|
allocator.free(thread_writer_list);
|
|
}
|
|
|
|
for (thread_writer_list) |*list| {
|
|
list.* = std.ArrayList(UUID).init(allocator);
|
|
}
|
|
|
|
// Spawn threads for each file
|
|
var wg: std.Thread.WaitGroup = .{};
|
|
for (to_parse, 0..) |file_index, i| self.thread_pool.spawnWg(&wg, populateFileIndexUUIDMapOneFile, .{
|
|
sstruct,
|
|
&thread_writer_list[i],
|
|
file_index,
|
|
dir,
|
|
});
|
|
wg.wait();
|
|
|
|
// Combine results
|
|
for (thread_writer_list, 0..) |list, file_index| {
|
|
for (list.items) |uuid| map.put(uuid, file_index) catch return ZipponError.MemoryError;
|
|
}
|
|
}
|
|
|
|
fn populateFileIndexUUIDMapOneFile(
|
|
sstruct: SchemaStruct,
|
|
list: *std.ArrayList(UUID),
|
|
file_index: u64,
|
|
dir: std.fs.Dir,
|
|
) void {
|
|
var path_buffer: [1024 * 10]u8 = undefined;
|
|
var data_buffer: [config.BUFFER_SIZE]u8 = undefined;
|
|
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
|
|
defer fa.reset();
|
|
const allocator = fa.allocator();
|
|
|
|
const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
|
|
var iter = zid.DataIterator.init(allocator, path, dir, sstruct.zid_schema) catch return;
|
|
defer iter.deinit();
|
|
|
|
while (iter.next() catch return) |row| {
|
|
list.*.append(UUID{ .bytes = row[0].UUID }) catch return;
|
|
}
|
|
}
|
|
|
|
/// Use a struct name and filter to populate a map with all UUID bytes as key and void as value
|
|
/// This map is use as value for the ConditionValue of links, so I can do a `contains` on it.
|
|
pub fn populateVoidUUIDMap(
|
|
self: *Self,
|
|
struct_name: []const u8,
|
|
filter: ?Filter,
|
|
map: *std.AutoHashMap(UUID, void),
|
|
additional_data: *AdditionalData,
|
|
) ZipponError!void {
|
|
var arena = std.heap.ArenaAllocator.init(self.allocator);
|
|
defer arena.deinit();
|
|
var safe_allocator = std.heap.ThreadSafeAllocator{ .child_allocator = arena.allocator() };
|
|
const allocator = safe_allocator.allocator();
|
|
|
|
const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
|
|
|
|
const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{});
|
|
const to_parse = try self.allFileIndex(allocator, sstruct.name);
|
|
|
|
// Multi-threading setup
|
|
var sync_context = ThreadSyncContext.init(additional_data.limit);
|
|
|
|
// Create a thread-safe writer for each file
|
|
var thread_writer_list = allocator.alloc(std.ArrayList(UUID), to_parse.len + 1) catch return ZipponError.MemoryError;
|
|
|
|
for (thread_writer_list) |*list| {
|
|
list.* = std.ArrayList(UUID).init(allocator);
|
|
}
|
|
|
|
// Spawn threads for each file
|
|
var wg: std.Thread.WaitGroup = .{};
|
|
for (to_parse, 0..) |file_index, i| self.thread_pool.spawnWg(
|
|
&wg,
|
|
populateVoidUUIDMapOneFile,
|
|
.{
|
|
sstruct,
|
|
filter,
|
|
&thread_writer_list[i],
|
|
file_index,
|
|
dir,
|
|
&sync_context,
|
|
},
|
|
);
|
|
wg.wait();
|
|
|
|
// Combine results
|
|
for (thread_writer_list) |list| {
|
|
for (list.items) |uuid| map.put(uuid, {}) catch return ZipponError.MemoryError;
|
|
}
|
|
}
|
|
|
|
fn populateVoidUUIDMapOneFile(
|
|
sstruct: SchemaStruct,
|
|
filter: ?Filter,
|
|
list: *std.ArrayList(UUID),
|
|
file_index: u64,
|
|
dir: std.fs.Dir,
|
|
sync_context: *ThreadSyncContext,
|
|
) void {
|
|
var path_buffer: [1024 * 10]u8 = undefined;
|
|
var data_buffer: [config.BUFFER_SIZE]u8 = undefined;
|
|
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
|
|
defer fa.reset();
|
|
const allocator = fa.allocator();
|
|
|
|
const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
|
|
|
|
var iter = zid.DataIterator.init(allocator, path, dir, sstruct.zid_schema) catch return;
|
|
defer iter.deinit();
|
|
|
|
while (iter.next() catch return) |row| {
|
|
if (sync_context.checkStructLimit()) break;
|
|
if (filter == null or filter.?.evaluate(row)) {
|
|
list.*.append(UUID{ .bytes = row[0].UUID }) catch return;
|
|
|
|
if (sync_context.incrementAndCheckStructLimit()) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Take a filter, parse all file and if one struct if validate by the filter, write it in a JSON format to the writer
|
|
/// filter can be null. This will return all of them
|
|
pub fn parseEntities(
|
|
self: *Self,
|
|
struct_name: []const u8,
|
|
filter: ?Filter,
|
|
additional_data: *AdditionalData,
|
|
entry_allocator: Allocator,
|
|
) ZipponError![]const u8 {
|
|
var arena = std.heap.ArenaAllocator.init(self.allocator);
|
|
defer arena.deinit();
|
|
var safe_allocator = std.heap.ThreadSafeAllocator{ .child_allocator = arena.allocator() };
|
|
const allocator = safe_allocator.allocator();
|
|
|
|
var buff = std.ArrayList(u8).init(entry_allocator);
|
|
const writer = buff.writer();
|
|
|
|
const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
|
|
const to_parse = try self.allFileIndex(allocator, struct_name);
|
|
|
|
// If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them
|
|
if (additional_data.childrens.items.len == 0)
|
|
additional_data.populateWithEverythingExceptLink(sstruct.members, sstruct.types) catch return ZipponError.MemoryError;
|
|
|
|
// Do I populate the relationMap directly in the thread or do I do it on the string at the end ?
|
|
// I think it is better at the end, like that I dont need to create a deplicate of each map for the number of file
|
|
const relation_maps = try self.schema_engine.relationMapArrayInit(allocator, struct_name, additional_data.*);
|
|
|
|
// Open the dir that contain all files
|
|
const dir = try self.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{ .access_sub_paths = false });
|
|
|
|
// Multi thread stuffs
|
|
var sync_context = ThreadSyncContext.init(additional_data.limit);
|
|
|
|
// Do an array of writer for each thread
|
|
var thread_writer_list = allocator.alloc(std.ArrayList(u8), to_parse.len) catch return ZipponError.MemoryError;
|
|
|
|
// Start parsing all file in multiple thread
|
|
var wg: std.Thread.WaitGroup = .{};
|
|
for (to_parse, 0..) |file_index, i| {
|
|
thread_writer_list[i] = std.ArrayList(u8).init(allocator);
|
|
self.thread_pool.spawnWg(
|
|
&wg,
|
|
parseEntitiesOneFile,
|
|
.{
|
|
thread_writer_list[i].writer(),
|
|
file_index,
|
|
dir,
|
|
sstruct.zid_schema,
|
|
filter,
|
|
additional_data.*,
|
|
sstruct.types,
|
|
&sync_context,
|
|
},
|
|
);
|
|
}
|
|
wg.wait();
|
|
|
|
// Append all writer to each other
|
|
writer.writeByte('[') catch return ZipponError.WriteError;
|
|
for (thread_writer_list) |list| writer.writeAll(list.items) catch return ZipponError.WriteError;
|
|
writer.writeByte(']') catch return ZipponError.WriteError;
|
|
for (thread_writer_list) |list| list.deinit();
|
|
|
|
// Now I need to do the relation stuff, meaning parsing new files to get the relationship value
|
|
// Without relationship to return, this function is basically finish here
|
|
//
|
|
|
|
// Here I take the JSON string and I parse it to find all {<||>} and add them to the relation map with an empty JsonString
|
|
for (relation_maps) |*relation_map| try relation_map.populate(buff.items);
|
|
|
|
// I then call parseEntitiesRelationMap on each
|
|
// This will update the buff items to be the same Json but with {<|[16]u8|>} replaced with the right Json
|
|
for (relation_maps) |*relation_map| try self.parseEntitiesRelationMap(allocator, relation_map.struct_name, relation_map, &buff);
|
|
|
|
return buff.toOwnedSlice() catch return ZipponError.MemoryError;
|
|
}
|
|
|
|
fn parseEntitiesOneFile(
|
|
writer: anytype,
|
|
file_index: u64,
|
|
dir: std.fs.Dir,
|
|
zid_schema: []zid.DType,
|
|
filter: ?Filter,
|
|
additional_data: AdditionalData,
|
|
data_types: []const DataType,
|
|
sync_context: *ThreadSyncContext,
|
|
) void {
|
|
var path_buffer: [1024 * 10]u8 = undefined;
|
|
var data_buffer: [config.BUFFER_SIZE]u8 = undefined;
|
|
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
|
|
defer fa.reset();
|
|
const allocator = fa.allocator();
|
|
|
|
var buffered_writer = std.io.bufferedWriter(writer);
|
|
|
|
const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
|
|
var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch return;
|
|
|
|
while (iter.next() catch return) |row| {
|
|
if (sync_context.checkStructLimit()) return;
|
|
if (filter) |f| if (!f.evaluate(row)) continue;
|
|
|
|
EntityWriter.writeEntityJSON(
|
|
buffered_writer.writer(),
|
|
row,
|
|
additional_data,
|
|
data_types,
|
|
) catch return;
|
|
|
|
if (sync_context.incrementAndCheckStructLimit()) return;
|
|
}
|
|
buffered_writer.flush() catch return;
|
|
}
|
|
|
|
// Receive a map of UUID -> empty JsonString
|
|
// Will parse the files and update the value to the JSON string of the entity that represent the key
|
|
// Will then write the input with the JSON in the map looking for {<||>}
|
|
// Once the new input received, call parseEntitiesRelationMap again the string still contain {<||>} because of sub relationship
|
|
// The buffer contain the string with {<||>} and need to be updated at the end
|
|
pub fn parseEntitiesRelationMap(
|
|
self: *Self,
|
|
parent_allocator: Allocator,
|
|
struct_name: []const u8,
|
|
relation_map: *RelationMap,
|
|
buff: *std.ArrayList(u8),
|
|
) ZipponError!void {
|
|
var arena = std.heap.ArenaAllocator.init(parent_allocator);
|
|
defer arena.deinit();
|
|
const allocator = arena.allocator();
|
|
|
|
var new_buff = std.ArrayList(u8).init(allocator);
|
|
defer new_buff.deinit();
|
|
const writer = new_buff.writer();
|
|
|
|
const relation_maps = try self.schema_engine.relationMapArrayInit(
|
|
allocator,
|
|
struct_name,
|
|
relation_map.additional_data,
|
|
);
|
|
|
|
const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
|
|
const to_parse = try self.schema_engine.fileListToParse(allocator, struct_name, relation_map.map.*);
|
|
|
|
// If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them
|
|
if (relation_map.additional_data.childrens.items.len == 0) {
|
|
relation_map.additional_data.populateWithEverythingExceptLink(
|
|
sstruct.members,
|
|
sstruct.types,
|
|
) catch return ZipponError.MemoryError;
|
|
}
|
|
|
|
// Open the dir that contain all files
|
|
const dir = try self.printOpenDir(
|
|
"{s}/DATA/{s}",
|
|
.{ self.path_to_ZipponDB_dir, sstruct.name },
|
|
.{ .access_sub_paths = false },
|
|
);
|
|
|
|
// Multi thread stuffs
|
|
var sync_context = ThreadSyncContext.init(relation_map.additional_data.limit);
|
|
|
|
// Do one writer for each thread otherwise it create error by writing at the same time
|
|
var thread_map_list = allocator.alloc(
|
|
std.AutoHashMap([16]u8, JsonString),
|
|
to_parse.len,
|
|
) catch return ZipponError.MemoryError;
|
|
|
|
// Start parsing all file in multiple thread
|
|
var wg: std.Thread.WaitGroup = .{};
|
|
for (to_parse, 0..) |file_index, i| {
|
|
thread_map_list[i] = relation_map.map.cloneWithAllocator(allocator) catch return ZipponError.MemoryError;
|
|
self.thread_pool.spawnWg(
|
|
&wg,
|
|
parseEntitiesRelationMapOneFile,
|
|
.{
|
|
&thread_map_list[i],
|
|
file_index,
|
|
dir,
|
|
sstruct.zid_schema,
|
|
relation_map.additional_data,
|
|
sstruct.types,
|
|
&sync_context,
|
|
},
|
|
);
|
|
}
|
|
wg.wait();
|
|
|
|
// Now here I should have a list of copy of the map with all UUID a bit everywhere
|
|
|
|
// Put all in the same map
|
|
for (thread_map_list) |map| {
|
|
var iter = map.iterator();
|
|
while (iter.next()) |entry| {
|
|
if (entry.value_ptr.init) relation_map.*.map.put(entry.key_ptr.*, entry.value_ptr.*) catch return ZipponError.MemoryError;
|
|
}
|
|
}
|
|
|
|
// Here I write the new string and update the buff to have the new version
|
|
try EntityWriter.updateWithRelation(writer, buff.items, relation_map.map.*);
|
|
buff.clearRetainingCapacity();
|
|
buff.writer().writeAll(new_buff.items) catch return ZipponError.WriteError;
|
|
|
|
// Now here I need to iterate if buff.items still have {<||>}
|
|
|
|
// Here I take the JSON string and I parse it to find all {<||>} and add them to the relation map with an empty JsonString
|
|
for (relation_maps) |*sub_relation_map| try sub_relation_map.populate(buff.items);
|
|
|
|
// I then call parseEntitiesRelationMap on each
|
|
// This will update the buff items to be the same Json but with {<|[16]u8|>} replaced with the right Json
|
|
for (relation_maps) |*sub_relation_map| try parseEntitiesRelationMap(self, allocator, sub_relation_map.struct_name, sub_relation_map, buff);
|
|
}
|
|
|
|
fn parseEntitiesRelationMapOneFile(
|
|
map: *std.AutoHashMap([16]u8, JsonString),
|
|
file_index: u64,
|
|
dir: std.fs.Dir,
|
|
zid_schema: []zid.DType,
|
|
additional_data: AdditionalData,
|
|
data_types: []const DataType,
|
|
sync_context: *ThreadSyncContext,
|
|
) void {
|
|
var path_buffer: [1024 * 10]u8 = undefined;
|
|
var data_buffer: [config.BUFFER_SIZE]u8 = undefined;
|
|
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
|
|
defer fa.reset();
|
|
const allocator = fa.allocator();
|
|
|
|
const parent_alloc = map.allocator;
|
|
var string_list = std.ArrayList(u8).init(allocator);
|
|
const writer = string_list.writer();
|
|
|
|
const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch return;
|
|
var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch return;
|
|
|
|
while (iter.next() catch return) |row| {
|
|
if (sync_context.checkStructLimit()) return;
|
|
if (!map.contains(row[0].UUID)) continue;
|
|
defer string_list.clearRetainingCapacity();
|
|
|
|
EntityWriter.writeEntityJSON(
|
|
writer,
|
|
row,
|
|
additional_data,
|
|
data_types,
|
|
) catch return;
|
|
|
|
map.put(row[0].UUID, JsonString{
|
|
.slice = parent_alloc.dupe(u8, string_list.items) catch return,
|
|
.init = true,
|
|
}) catch return;
|
|
if (sync_context.incrementAndCheckStructLimit()) return;
|
|
}
|
|
}
|