diff --git a/benchmark.zig b/benchmark.zig index 2692b93..18af7a8 100644 --- a/benchmark.zig +++ b/benchmark.zig @@ -1,7 +1,6 @@ const std = @import("std"); const dtype = @import("dtype"); const DBEngine = @import("src/cli/core.zig"); -const ziqlParser = @import("src/ziql/parser.zig"); const ZipponError = @import("error").ZipponError; const names = [_][]const u8{ "Alice", "Bob", "Charlie", "Dave", "Eve" }; @@ -28,17 +27,18 @@ pub fn myLog( _ = args; } +// Maybe I can make it a test to use the testing alloc pub fn main() !void { + const allocator = std.heap.page_allocator; const to_test = [_]usize{ 500, 50_000, 1_000_000 }; var line_buffer: [1024 * 1024]u8 = undefined; for (to_test) |users_count| { - var db_engine = DBEngine.init("benchmarkDB", "schema/benchmark"); + var db_engine = DBEngine.init(allocator, "benchmarkDB", "schema/benchmark"); defer db_engine.deinit(); { const null_term_query_str = try std.fmt.bufPrintZ(&line_buffer, "DELETE User {{}}", .{}); - var parser = ziqlParser.init(&db_engine.file_engine, &db_engine.schema_engine); - try parser.parse(null_term_query_str); + db_engine.runQuery(null_term_query_str); } // Populate with random dummy value // Need some speed up, spended times to find that it is the parsonConditionValue that take time, the last switch to be exact, that parse str to value @@ -46,8 +46,6 @@ pub fn main() !void { std.debug.print("\n=====================================\n\n", .{}); std.debug.print("Populating with {d} users.\n", .{users_count}); - const allocator = std.heap.page_allocator; - var prng = std.rand.DefaultPrng.init(0); const rng = prng.random(); const populate_start_time = std.time.nanoTimestamp(); @@ -77,8 +75,7 @@ pub fn main() !void { const null_term_query_str = try std.fmt.allocPrintZ(allocator, "{s}", .{array.items}); defer allocator.free(null_term_query_str); - var parser = ziqlParser.init(&db_engine.file_engine, &db_engine.schema_engine); - try parser.parse(null_term_query_str); + db_engine.runQuery(null_term_query_str); const populate_end_time = std.time.nanoTimestamp(); const populate_duration = @as(f64, @floatFromInt(populate_end_time - populate_start_time)) / 1e9; @@ -118,8 +115,7 @@ pub fn main() !void { // Execute the query here const null_term_query_str = try std.fmt.bufPrintZ(&line_buffer, "{s}", .{query}); - var parser = ziqlParser.init(&db_engine.file_engine, &db_engine.schema_engine); - try parser.parse(null_term_query_str); + db_engine.runQuery(null_term_query_str); const end_time = std.time.nanoTimestamp(); const duration = @as(f64, @floatFromInt(end_time - start_time)) / 1e6; diff --git a/build.zig b/build.zig index c11dea8..4a1e75c 100644 --- a/build.zig +++ b/build.zig @@ -2,7 +2,7 @@ const std = @import("std"); pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); - const optimize = b.standardOptimizeOption(.{}); + const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseFast }); // Run // ----------------------------------------------- @@ -126,6 +126,7 @@ pub fn build(b: *std.Build) void { } // Release + // TODO: Make a small, fast and safe release // ----------------------------------------------- { const release_step = b.step("release", "Create release binaries for multiple platforms"); diff --git a/lib/zid.zig b/lib/zid.zig index 9afc6a8..7c7bc5c 100644 --- a/lib/zid.zig +++ b/lib/zid.zig @@ -213,10 +213,16 @@ pub const Data = union(DType) { // I know, I know I use @sizeOf too much, but I like it. Allow me to understand what it represent +const empty_buff: [4]u8 = .{ 0, 0, 0, 0 }; + /// Take an array of zig type and return an encoded version to use with Data.initType /// Like that: Data.initIntArray(try allocEncodArray.Int(my_array)) /// Don't forget to free it! allocator.free(data.IntArray) pub const allocEncodArray = struct { + pub fn Empty() []const u8 { + return empty_buff[0..]; + } + pub fn Int(allocator: std.mem.Allocator, items: []const i32) ![]const u8 { // Create a buffer of the right size var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(i32) * items.len); diff --git a/src/cli/core.zig b/src/cli/core.zig index 80c9fba..4a408e8 100644 --- a/src/cli/core.zig +++ b/src/cli/core.zig @@ -9,38 +9,48 @@ const ziqlParser = @import("../ziql/parser.zig"); const setLogPath = @import("../main.zig").setLogPath; const log = std.log.scoped(.cli); -const DBEngineState = enum { MissingFileEngine, MissingSchemaEngine, Ok, Init }; +const DBEngineState = enum { MissingFileEngine, MissingSchemaEngine, MissingAllocator, MissingThreadEngine, Ok, Init }; pub const Self = @This(); var path_buffer: [1024]u8 = undefined; var line_buffer: [config.BUFFER_SIZE]u8 = undefined; var in_buffer: [config.BUFFER_SIZE]u8 = undefined; -var out_buffer: [config.BUFFER_SIZE]u8 = undefined; - var value_buffer: [1024]u8 = undefined; -var buffer: [1024 * 1024]u8 = undefined; // For env var -var fa = std.heap.FixedBufferAllocator.init(&buffer); -const allocator = fa.allocator(); usingnamespace @import("parser.zig"); +arena: *std.heap.ArenaAllocator = undefined, +allocator: Allocator = undefined, state: DBEngineState = .Init, file_engine: FileEngine = undefined, schema_engine: SchemaEngine = undefined, thread_engine: ThreadEngine = undefined, -pub fn init(potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) Self { +pub fn init(parent_allocator: Allocator, potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) Self { var self = Self{}; - self.thread_engine = ThreadEngine.init(); + const arena = parent_allocator.create(std.heap.ArenaAllocator) catch { + log.err("Error when init Engine DB allocator", .{}); + self.state = .MissingAllocator; + return self; + }; + arena.* = std.heap.ArenaAllocator.init(parent_allocator); + self.arena = arena; + self.allocator = arena.allocator(); - const potential_main_path_or_environment_variable = potential_main_path orelse getEnvVariable("ZIPPONDB_PATH"); + self.thread_engine = ThreadEngine.init(self.allocator) catch { + log.err("Error initializing thread engine", .{}); + self.state = .MissingThreadEngine; + return self; + }; + + const potential_main_path_or_environment_variable = potential_main_path orelse getEnvVariable(self.allocator, "ZIPPONDB_PATH"); if (potential_main_path_or_environment_variable) |main_path| { setLogPath(main_path); log.info("Found ZIPPONDB_PATH: {s}.", .{main_path}); - self.file_engine = FileEngine.init(main_path, self.thread_engine.thread_pool) catch { + self.file_engine = FileEngine.init(self.allocator, main_path, self.thread_engine.thread_pool) catch { log.err("Error when init FileEngine", .{}); self.state = .MissingFileEngine; return self; @@ -65,7 +75,7 @@ pub fn init(potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8 }; log.info("Schema founded in the database directory.", .{}); - self.schema_engine = SchemaEngine.init(schema_path, &self.file_engine) catch |err| { + self.schema_engine = SchemaEngine.init(self.allocator, schema_path, &self.file_engine) catch |err| { log.err("Error when init SchemaEngine: {any}", .{err}); self.state = .MissingSchemaEngine; return self; @@ -84,10 +94,10 @@ pub fn init(potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8 } log.info("Database don't have any schema yet, trying to add one.", .{}); - const potential_schema_path_or_environment_variable = potential_schema_path orelse getEnvVariable("ZIPPONDB_SCHEMA"); + const potential_schema_path_or_environment_variable = potential_schema_path orelse getEnvVariable(self.allocator, "ZIPPONDB_SCHEMA"); if (potential_schema_path_or_environment_variable) |schema_path| { log.info("Found schema path {s}.", .{schema_path}); - self.schema_engine = SchemaEngine.init(schema_path, &self.file_engine) catch |err| { + self.schema_engine = SchemaEngine.init(self.allocator, schema_path, &self.file_engine) catch |err| { log.err("Error when init SchemaEngine: {any}", .{err}); self.state = .MissingSchemaEngine; return self; @@ -129,9 +139,7 @@ pub fn start(self: *Self) !void { } } -pub fn getEnvVariable(variable: []const u8) ?[]const u8 { - fa.reset(); - +pub fn getEnvVariable(allocator: Allocator, variable: []const u8) ?[]const u8 { var env_map = std.process.getEnvMap(allocator) catch return null; var iter = env_map.iterator(); @@ -150,4 +158,8 @@ pub fn runQuery(self: *Self, null_term_query_str: [:0]const u8) void { pub fn deinit(self: *Self) void { self.thread_engine.deinit(); self.schema_engine.deinit(); + self.file_engine.deinit(); + const parent_allocator = self.arena.child_allocator; + self.arena.deinit(); + parent_allocator.destroy(self.arena); } diff --git a/src/cli/parser.zig b/src/cli/parser.zig index 751d87c..dc1299a 100644 --- a/src/cli/parser.zig +++ b/src/cli/parser.zig @@ -20,16 +20,14 @@ const State = enum { const Self = @import("core.zig"); -var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); -var allocator = arena.allocator(); - pub fn parse(self: *Self, null_term_line_str: [:0]const u8) !bool { var toker = cliTokenizer.init(null_term_line_str); var token = toker.next(); var state = State.expect_main_command; - defer _ = arena.reset(.free_all); - errdefer arena.deinit(); + var arena = std.heap.ArenaAllocator.init(self.allocator); + var allocator = arena.allocator(); + defer arena.deinit(); var last_token: cliToken = undefined; @@ -137,7 +135,7 @@ pub fn parse(self: *Self, null_term_line_str: [:0]const u8) !bool { .expect_path_to_db => switch (token.tag) { .identifier => { self.deinit(); - self.* = Self.init(toker.getTokenSlice(token), null); + self.* = Self.init(self.arena.child_allocator, toker.getTokenSlice(token), null); state = .end; }, else => { @@ -196,7 +194,7 @@ pub fn parse(self: *Self, null_term_line_str: [:0]const u8) !bool { .identifier => { const main_path = try allocator.dupe(u8, self.file_engine.path_to_ZipponDB_dir); self.deinit(); - self.* = Self.init(main_path, toker.getTokenSlice(token)); + self.* = Self.init(self.arena.child_allocator, main_path, toker.getTokenSlice(token)); try self.file_engine.writeSchemaFile(self.schema_engine.null_terminated); state = .end; }, diff --git a/src/dataStructure/RadixTrie.zig.old b/src/dataStructure/RadixTrie.zig.old index b7d3895..40b8279 100644 --- a/src/dataStructure/RadixTrie.zig.old +++ b/src/dataStructure/RadixTrie.zig.old @@ -2,7 +2,6 @@ const std = @import("std"); const UUID = @import("dtype").UUID; const ArenaAllocator = std.heap.ArenaAllocator; -// TODO: // 1. Basic RadixTrie and Node - OK // 2. Add one UUID // 3. Get one file index using one UUID diff --git a/src/dataStructure/filter.zig b/src/dataStructure/filter.zig index 5f9fe2c..733e9e0 100644 --- a/src/dataStructure/filter.zig +++ b/src/dataStructure/filter.zig @@ -245,7 +245,6 @@ pub const Filter = struct { } } - // TODO: Use []Data and make it work pub fn evaluate(self: Filter, row: []Data) bool { return self.evaluateNode(self.root, row); } diff --git a/src/file/array.zig b/src/file/array.zig new file mode 100644 index 0000000..0567c6a --- /dev/null +++ b/src/file/array.zig @@ -0,0 +1,272 @@ +const std = @import("std"); +const zid = @import("ZipponData"); +const dtype = @import("dtype"); +const ConditionValue = @import("../dataStructure/filter.zig").ConditionValue; +const ArrayCondition = @import("../ziql/parts//newData.zig").ArrayCondition; + +pub fn updateData(allocator: std.mem.Allocator, condition: ArrayCondition, input: *zid.Data, data: []ConditionValue) !void { + switch (condition) { + .append => try append(allocator, input, data), + .pop => pop(input), + .clear => clear(input), + .remove => try remove(allocator, input, data), + .removeat => try removeat(allocator, input, data), + } +} + +fn pop(input: *zid.Data) void { + switch (input.*) { + .IntArray => |v| if (v.len > 4) { + input.*.IntArray = v[0 .. v.len - input.size()]; + }, + .FloatArray => |v| if (v.len > 4) { + input.*.FloatArray = v[0 .. v.len - input.size()]; + }, + .UnixArray => |v| if (v.len > 4) { + input.*.UnixArray = v[0 .. v.len - input.size()]; + }, + .UUIDArray => |v| if (v.len > 4) { + input.*.UUIDArray = v[0 .. v.len - input.size()]; + }, + .BoolArray => |v| if (v.len > 4) { + input.*.BoolArray = v[0 .. v.len - input.size()]; + }, + .StrArray => |v| if (v.len > 4) { + input.*.StrArray = v[0 .. v.len - input.size()]; + }, + else => unreachable, + } +} + +fn clear(input: *zid.Data) void { + switch (input.*) { + .IntArray => input.*.IntArray = zid.allocEncodArray.Empty(), + .FloatArray => input.*.FloatArray = zid.allocEncodArray.Empty(), + .UnixArray => input.*.UnixArray = zid.allocEncodArray.Empty(), + .UUIDArray => input.*.UUIDArray = zid.allocEncodArray.Empty(), + .BoolArray => input.*.BoolArray = zid.allocEncodArray.Empty(), + .StrArray => input.*.StrArray = zid.allocEncodArray.Empty(), + else => unreachable, + } +} + +fn allocForAppend(allocator: std.mem.Allocator, input: *zid.Data, data: []ConditionValue) []zid.Data { + switch (input.*) { + .UUIDArray => { + var total: usize = 0; + for (data) |d| total += d.link_array.count(); + return try allocator.alloc(zid.Data, total); + }, + else => return try allocator.alloc(zid.Data, data.len), + } +} +// I think I could use meta programming here by adding the type as argument +fn append(allocator: std.mem.Allocator, input: *zid.Data, data: []ConditionValue) !void { + switch (input.*) { + .IntArray => { + // 1. Make a list of the right type from ConditionValue + var array = std.ArrayList(i32).init(allocator); + defer array.deinit(); + for (data) |d| try array.append(d.int); + + // 2. Encode the new array + const new_array = try zid.allocEncodArray.Int(allocator, array.items); + + // 3. Add the new array at the end of the old one without the first 4 bytes that are the number of value in the array + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.IntArray); + try updated_array.appendSlice(new_array[4..]); + + // 4. Update the number of value in the array + const new_len = input.size() + data.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + + // 5. Update the input + input.*.IntArray = try updated_array.toOwnedSlice(); + }, + .FloatArray => { + var array = std.ArrayList(f64).init(allocator); + defer array.deinit(); + for (data) |d| try array.append(d.float); + const new_array = try zid.allocEncodArray.Float(allocator, array.items); + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.FloatArray); + try updated_array.appendSlice(new_array[4..]); + const new_len = input.size() + data.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + input.*.FloatArray = try updated_array.toOwnedSlice(); + }, + .UnixArray => { + var array = std.ArrayList(u64).init(allocator); + defer array.deinit(); + for (data) |d| try array.append(d.unix); + const new_array = try zid.allocEncodArray.Unix(allocator, array.items); + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.UnixArray); + try updated_array.appendSlice(new_array[4..]); + const new_len = input.size() + data.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + input.*.UnixArray = try updated_array.toOwnedSlice(); + }, + .BoolArray => { + var array = std.ArrayList(bool).init(allocator); + defer array.deinit(); + for (data) |d| try array.append(d.bool_); + const new_array = try zid.allocEncodArray.Bool(allocator, array.items); + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.BoolArray); + try updated_array.appendSlice(new_array[4..]); + const new_len = input.size() + data.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + input.*.BoolArray = try updated_array.toOwnedSlice(); + }, + .StrArray => { + var array = std.ArrayList([]const u8).init(allocator); + defer array.deinit(); + for (data) |d| try array.append(d.str); + const new_array = try zid.allocEncodArray.Str(allocator, array.items); + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.StrArray); + try updated_array.appendSlice(new_array[4..]); + const new_len = input.size() + data.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + input.*.StrArray = try updated_array.toOwnedSlice(); + }, + .UUIDArray => { // If input is a UUID array, that mean all data are also UUIDArray. There should be only one UUIDArray in data as it is use like that "friends APPEND {name = 'Bob'}" + var array = std.ArrayList([16]u8).init(allocator); + defer array.deinit(); + for (data) |d| { + var iter = d.link_array.keyIterator(); + while (iter.next()) |uuid| try array.append(uuid.bytes); + } + const new_array = try zid.allocEncodArray.UUID(allocator, array.items); + var updated_array = std.ArrayList(u8).init(allocator); + try updated_array.appendSlice(input.UUIDArray); + try updated_array.appendSlice(new_array[4..]); + const new_len = input.size() + array.items.len; + @memcpy(updated_array.items[0..@sizeOf(u64)], std.mem.asBytes(&new_len)); + input.*.UUIDArray = try updated_array.toOwnedSlice(); + }, + else => unreachable, + } +} + +// TODO: Change the array for a map to speed up thing +// And also I dont really need to realoc anything, only append need because here it can only go lower +// So I could just memcopy the remaining of the bytes at the current position, so it overwrite the value to remove +// Like if I want to re;ove 3 in [1 2 3 4 5], it would become [1 2 4 5 5]. Then I dont take the last value when I return. +// But that mean I keep in memory useless data, so maybe not +fn remove(allocator: std.mem.Allocator, input: *zid.Data, data: []ConditionValue) !void { + var iter = try zid.ArrayIterator.init(input.*); + switch (input.*) { + .IntArray => { + var array = std.ArrayList(i32).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.Int); + input.*.IntArray = try zid.allocEncodArray.Int(allocator, array.items); + }, + .FloatArray => { + var array = std.ArrayList(f64).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.Float); + input.*.FloatArray = try zid.allocEncodArray.Float(allocator, array.items); + }, + .UnixArray => { + var array = std.ArrayList(u64).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.Unix); + input.*.UnixArray = try zid.allocEncodArray.Unix(allocator, array.items); + }, + .BoolArray => { + var array = std.ArrayList(bool).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.Bool); + input.*.BoolArray = try zid.allocEncodArray.Bool(allocator, array.items); + }, + .StrArray => { + var array = std.ArrayList([]const u8).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.Str); + input.*.StrArray = try zid.allocEncodArray.Str(allocator, array.items); + }, + .UUIDArray => { + var array = std.ArrayList([16]u8).init(allocator); + defer array.deinit(); + while (iter.next()) |v| if (!in(v, data)) try array.append(v.UUID); + input.*.UUIDArray = try zid.allocEncodArray.UUID(allocator, array.items); + }, + else => unreachable, + } +} + +fn removeat(allocator: std.mem.Allocator, input: *zid.Data, data: []ConditionValue) !void { + var iter = try zid.ArrayIterator.init(input.*); + switch (input.*) { + .IntArray => { + var array = std.ArrayList(i32).init(allocator); + defer array.deinit(); + var i: i32 = 0; // Maybe use usize because here it limite the size of the array + while (iter.next()) |v| { + defer i += 1; + if (!in(zid.Data{ .Int = i }, data)) try array.append(v.Int); + } + input.*.IntArray = try zid.allocEncodArray.Int(allocator, array.items); + }, + .FloatArray => { + var array = std.ArrayList(f64).init(allocator); + defer array.deinit(); + var i: i32 = 0; // Maybe use usize because here it limite the size of the array + while (iter.next()) |v| { + defer i += 1; + if (!in(zid.Data{ .Int = i }, data)) try array.append(v.Float); + } + input.*.FloatArray = try zid.allocEncodArray.Float(allocator, array.items); + }, + .UnixArray => { + var array = std.ArrayList(u64).init(allocator); + defer array.deinit(); + var i: i32 = 0; // Maybe use usize because here it limite the size of the array + while (iter.next()) |v| { + defer i += 1; + if (!in(zid.Data{ .Int = i }, data)) try array.append(v.Unix); + } + input.*.UnixArray = try zid.allocEncodArray.Unix(allocator, array.items); + }, + .BoolArray => { + var array = std.ArrayList(bool).init(allocator); + defer array.deinit(); + var i: i32 = 0; // Maybe use usize because here it limite the size of the array + while (iter.next()) |v| { + defer i += 1; + if (!in(zid.Data{ .Int = i }, data)) try array.append(v.Bool); + } + input.*.BoolArray = try zid.allocEncodArray.Bool(allocator, array.items); + }, + .StrArray => { + var array = std.ArrayList([]const u8).init(allocator); + defer array.deinit(); + var i: i32 = 0; // Maybe use usize because here it limite the size of the array + while (iter.next()) |v| { + defer i += 1; + if (!in(zid.Data{ .Int = i }, data)) try array.append(v.Str); + } + input.*.StrArray = try zid.allocEncodArray.Str(allocator, array.items); + }, + .UUIDArray => unreachable, // I cant do that for removeat because link don't really have order + else => unreachable, + } +} + +// Should just use a map.contain +fn in(x: zid.Data, y: []ConditionValue) bool { + switch (x) { + .Int => |v| for (y) |z| if (v == z.int) return true, + .Float => |v| for (y) |z| if (v == z.float) return true, + .Unix => |v| for (y) |z| if (v == z.unix) return true, + .Bool => |v| for (y) |z| if (v == z.bool_) return true, + .Str => |v| for (y) |z| if (std.mem.eql(u8, z.str, v)) return true, + .UUID => |v| for (y) |z| if (z.link_array.contains(dtype.UUID{ .bytes = v })) return true, + else => unreachable, + } + return false; +} diff --git a/src/file/core.zig b/src/file/core.zig index d399bc6..20d0e66 100644 --- a/src/file/core.zig +++ b/src/file/core.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const config = @import("config"); const Pool = std.Thread.Pool; const SchemaEngine = @import("../schema/core.zig"); @@ -6,6 +7,7 @@ const ZipponError = @import("error").ZipponError; const log = std.log.scoped(.fileEngine); var path_to_ZipponDB_dir_buffer: [1024]u8 = undefined; +pub var data_buffer: [config.BUFFER_SIZE]u8 = undefined; /// Manage everything that is relate to read or write in files /// Or even get stats, whatever. If it touch files, it's here @@ -19,14 +21,25 @@ pub usingnamespace @import("read.zig"); pub usingnamespace @import("write.zig"); pub usingnamespace @import("dump.zig"); -allocator: std.mem.Allocator = std.heap.page_allocator, +arena: *std.heap.ArenaAllocator, +allocator: std.mem.Allocator, path_to_ZipponDB_dir: []const u8, thread_pool: *Pool, // same pool as the ThreadEngine schema_engine: SchemaEngine = undefined, // This is init after the FileEngine and I attach after. Do I need to init after tho ? -pub fn init(path: []const u8, thread_pool: *Pool) ZipponError!Self { +pub fn init(allocator: std.mem.Allocator, path: []const u8, thread_pool: *Pool) ZipponError!Self { + const arena = allocator.create(std.heap.ArenaAllocator) catch return ZipponError.MemoryError; + arena.* = std.heap.ArenaAllocator.init(allocator); return Self{ + .arena = arena, + .allocator = arena.allocator(), .path_to_ZipponDB_dir = std.fmt.bufPrint(&path_to_ZipponDB_dir_buffer, "{s}", .{path}) catch return ZipponError.MemoryError, .thread_pool = thread_pool, }; } + +pub fn deinit(self: *Self) void { + const parent_allocator = self.arena.child_allocator; + self.arena.deinit(); + parent_allocator.destroy(self.arena); +} diff --git a/src/file/read.zig b/src/file/read.zig index 803993b..db60fb5 100644 --- a/src/file/read.zig +++ b/src/file/read.zig @@ -28,13 +28,9 @@ var path_buffer: [1024]u8 = undefined; /// Use a struct name to populate a list with all UUID of this struct /// TODO: Multi thread that too pub fn getNumberOfEntityAndFile(self: *Self, struct_name: []const u8) ZipponError!struct { entity: usize, file: usize } { - var arena = std.heap.ArenaAllocator.init(self.allocator); - defer arena.deinit(); - const allocator = arena.allocator(); - const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); - const to_parse = try self.allFileIndex(allocator, struct_name); - defer allocator.free(to_parse); + const to_parse = try self.allFileIndex(self.allocator, struct_name); + defer self.allocator.free(to_parse); return .{ .entity = sstruct.uuid_file_index.map.count(), .file = to_parse.len }; } diff --git a/src/file/utils.zig b/src/file/utils.zig index e0020f2..f5573a6 100644 --- a/src/file/utils.zig +++ b/src/file/utils.zig @@ -5,6 +5,7 @@ const Self = @import("core.zig").Self; const ZipponError = @import("error").ZipponError; const Allocator = std.mem.Allocator; const ConditionValue = @import("../dataStructure/filter.zig").ConditionValue; +const ValueOrArray = @import("../ziql/parts/newData.zig").ValueOrArray; const dtype = @import("dtype"); const UUID = dtype.UUID; const zid = @import("ZipponData"); @@ -115,7 +116,7 @@ pub fn orderedNewData( self: *Self, allocator: Allocator, struct_name: []const u8, - map: std.StringHashMap(ConditionValue), + map: std.StringHashMap(ValueOrArray), ) ZipponError![]zid.Data { const members = try self.schema_engine.structName2structMembers(struct_name); var datas = allocator.alloc(zid.Data, (members.len)) catch return ZipponError.MemoryError; @@ -125,7 +126,7 @@ pub fn orderedNewData( for (members, 0..) |member, i| { if (i == 0) continue; // Skip the id - datas[i] = try string2Data(allocator, map.get(member).?); + datas[i] = try string2Data(allocator, map.get(member).?.value); } return datas; diff --git a/src/file/write.zig b/src/file/write.zig index 7df0aa1..a5e0b12 100644 --- a/src/file/write.zig +++ b/src/file/write.zig @@ -2,6 +2,7 @@ const std = @import("std"); const config = @import("config"); const utils = @import("../utils.zig"); const zid = @import("ZipponData"); +const updateData = @import("array.zig").updateData; const Allocator = std.mem.Allocator; const Self = @import("core.zig").Self; const ZipponError = @import("error").ZipponError; @@ -14,6 +15,7 @@ const RelationMap = @import("../dataStructure/relationMap.zig"); const JsonString = RelationMap.JsonString; const EntityWriter = @import("entityWriter.zig"); const ThreadSyncContext = @import("../thread/context.zig"); +const ValueOrArray = @import("../ziql/parts/newData.zig").ValueOrArray; const dtype = @import("dtype"); const s2t = dtype.s2t; @@ -27,10 +29,10 @@ var path_buffer: [1024]u8 = undefined; pub fn addEntity( self: *Self, struct_name: []const u8, - maps: []std.StringHashMap(ConditionValue), + maps: []std.StringHashMap(ValueOrArray), writer: anytype, ) ZipponError!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(self.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -62,15 +64,17 @@ pub fn addEntity( data_writer.flush() catch return ZipponError.ZipponDataError; } +const UpdatePosibility = enum { fix, vari, stay }; + pub fn updateEntities( self: *Self, struct_name: []const u8, filter: ?Filter, - map: std.StringHashMap(ConditionValue), + map: std.StringHashMap(ValueOrArray), writer: anytype, additional_data: *AdditionalData, ) ZipponError!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(self.allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -91,21 +95,32 @@ pub fn updateEntities( list.* = std.ArrayList(u8).init(allocator); } - var new_data_buff = allocator.alloc(zid.Data, sstruct.members.len) catch return ZipponError.MemoryError; + var index_switch = std.ArrayList(UpdatePosibility).init(allocator); + defer index_switch.deinit(); - // Convert the map to an array of ZipponData Data type, to be use with ZipponData writter - for (sstruct.members, 0..) |member, i| { - if (!map.contains(member)) continue; - new_data_buff[i] = try @import("utils.zig").string2Data(allocator, map.get(member).?); + // If the member name is not in the map, it stay + // Otherwise it need to be update. For that 2 scenarios: + // - Update all entities with a const .fix + // - Update entities base on themself .vari + // FIXME: I'm not sure that id is in the array, need to check, also need to check to prevent updating it + for (sstruct.members) |member| { + if (map.get(member)) |voa| { + switch (voa) { + .value => index_switch.append(.fix) catch return ZipponError.MemoryError, + .array => index_switch.append(.vari) catch return ZipponError.MemoryError, + } + } else { + index_switch.append(.stay) catch return ZipponError.MemoryError; + } } // Spawn threads for each file for (to_parse, 0..) |file_index, i| { self.thread_pool.spawn(updateEntitiesOneFile, .{ - new_data_buff, sstruct, filter, - &map, + map, + index_switch.items, thread_writer_list[i].writer(), file_index, dir, @@ -125,10 +140,10 @@ pub fn updateEntities( } fn updateEntitiesOneFile( - new_data_buff: []zid.Data, sstruct: SchemaStruct, filter: ?Filter, - map: *const std.StringHashMap(ConditionValue), + map: std.StringHashMap(ValueOrArray), + index_switch: []UpdatePosibility, writer: anytype, file_index: u64, dir: std.fs.Dir, @@ -140,6 +155,20 @@ fn updateEntitiesOneFile( defer fa.reset(); const allocator = fa.allocator(); + var new_data_buff = allocator.alloc(zid.Data, index_switch.len) catch |err| { + sync_context.logError("Cant init new data buff", err); + return; + }; + + // First I fill the one that are updated by a const + for (index_switch, 0..) |is, i| switch (is) { + .fix => new_data_buff[i] = @import("utils.zig").string2Data(allocator, map.get(sstruct.members[i]).?.value) catch |err| { + sync_context.logError("Writting data", err); + return; + }, + else => {}, + }; + const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -176,11 +205,18 @@ fn updateEntitiesOneFile( }) |row| { if (!finish_writing and (filter == null or filter.?.evaluate(row))) { // Add the unchanged Data in the new_data_buff - new_data_buff[0] = row[0]; - for (sstruct.members, 0..) |member, i| { - if (map.contains(member)) continue; - new_data_buff[i] = row[i]; - } + for (index_switch, 0..) |is, i| switch (is) { + .stay => new_data_buff[i] = row[i], + .vari => { + const x = map.get(sstruct.members[i]).?.array; + updateData(allocator, x.condition, &row[i], x.data) catch |err| { + sync_context.logError("Error updating data", err); + zid.deleteFile(new_path, dir) catch {}; + return; + }; + }, + else => {}, + }; log.debug("{d} {any}\n\n", .{ new_data_buff.len, new_data_buff }); @@ -233,7 +269,7 @@ pub fn deleteEntities( writer: anytype, additional_data: *AdditionalData, ) ZipponError!void { - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + var arena = std.heap.ArenaAllocator.init(self.allocator); defer arena.deinit(); const allocator = arena.allocator(); diff --git a/src/main.zig b/src/main.zig index 8241941..98ded3f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -48,7 +48,10 @@ pub fn setLogPath(path: []const u8) void { } pub fn main() !void { - var cli = Cli.init(null, null); + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + + var cli = Cli.init(arena.allocator(), null, null); defer cli.deinit(); try cli.start(); diff --git a/src/schema/core.zig b/src/schema/core.zig index 465e61c..b389294 100644 --- a/src/schema/core.zig +++ b/src/schema/core.zig @@ -13,18 +13,19 @@ const log = std.log.scoped(.schemaEngine); /// This include keeping in memory the schema and schema file, and some functions to get like all members of a specific struct. pub const Self = @This(); -var arena: std.heap.ArenaAllocator = undefined; -pub var allocator: Allocator = undefined; var schema_buffer: [config.BUFFER_SIZE]u8 = undefined; pub usingnamespace @import("utils.zig"); +arena: *std.heap.ArenaAllocator, +allocator: Allocator, struct_array: []SchemaStruct, null_terminated: [:0]u8, -pub fn init(path: []const u8, file_engine: *FileEngine) ZipponError!Self { - arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - allocator = arena.allocator(); +pub fn init(parent_allocator: Allocator, path: []const u8, file_engine: *FileEngine) ZipponError!Self { + const arena = parent_allocator.create(std.heap.ArenaAllocator) catch return ZipponError.MemoryError; + arena.* = std.heap.ArenaAllocator.init(parent_allocator); + const allocator = arena.allocator(); var buffer: [config.BUFFER_SIZE]u8 = undefined; @@ -48,15 +49,15 @@ pub fn init(path: []const u8, file_engine: *FileEngine) ZipponError!Self { } return Self{ + .arena = arena, + .allocator = allocator, .struct_array = struct_array.toOwnedSlice() catch return ZipponError.MemoryError, .null_terminated = null_terminated, }; } -pub fn deinit(_: Self) void { - arena.deinit(); -} - -pub fn getAllocator() Allocator { - return allocator; +pub fn deinit(self: *Self) void { + const parent_allocator = self.arena.child_allocator; + self.arena.deinit(); + parent_allocator.destroy(self.arena); } diff --git a/src/schema/utils.zig b/src/schema/utils.zig index 89b70b0..63c5aa6 100644 --- a/src/schema/utils.zig +++ b/src/schema/utils.zig @@ -8,6 +8,7 @@ const SchemaStruct = @import("struct.zig"); const ConditionValue = @import("../dataStructure/filter.zig").ConditionValue; const AdditionalData = @import("../dataStructure/additionalData.zig"); const RelationMap = @import("../dataStructure/relationMap.zig"); +const ValueOrArray = @import("../ziql/parts/newData.zig").ValueOrArray; const JsonString = RelationMap.JsonString; const ZipponError = @import("error").ZipponError; @@ -100,7 +101,7 @@ pub fn linkedStructName(self: Self, struct_name: []const u8, member_name: []cons pub fn checkIfAllMemberInMap( self: Self, struct_name: []const u8, - map: *std.StringHashMap(ConditionValue), + map: *std.StringHashMap(ValueOrArray), error_message_buffer: *std.ArrayList(u8), ) ZipponError!bool { const all_struct_member = try self.structName2structMembers(struct_name); diff --git a/src/thread/engine.zig b/src/thread/engine.zig index 33da7e4..5ed8cfe 100644 --- a/src/thread/engine.zig +++ b/src/thread/engine.zig @@ -6,32 +6,32 @@ const ZipponError = @import("error").ZipponError; const CPU_CORE = @import("config").CPU_CORE; const log = std.log.scoped(.thread); -const allocator = std.heap.page_allocator; - -var thread_arena: std.heap.ThreadSafeAllocator = undefined; -var thread_pool: Pool = undefined; - pub const ThreadEngine = @This(); thread_arena: *std.heap.ThreadSafeAllocator, thread_pool: *Pool, -pub fn init() ThreadEngine { - thread_arena = std.heap.ThreadSafeAllocator{ +pub fn init(allocator: std.mem.Allocator) !ThreadEngine { + const thread_arena = try allocator.create(std.heap.ThreadSafeAllocator); + thread_arena.* = std.heap.ThreadSafeAllocator{ .child_allocator = allocator, }; + const thread_pool = try allocator.create(std.Thread.Pool); thread_pool.init(std.Thread.Pool.Options{ .allocator = thread_arena.allocator(), .n_jobs = CPU_CORE, }) catch @panic("=("); return ThreadEngine{ - .thread_pool = &thread_pool, - .thread_arena = &thread_arena, + .thread_pool = thread_pool, + .thread_arena = thread_arena, }; } -pub fn deinit(_: ThreadEngine) void { - thread_pool.deinit(); +pub fn deinit(self: *ThreadEngine) void { + const parent_allocator = self.thread_arena.allocator(); + self.thread_pool.deinit(); + parent_allocator.destroy(self.thread_arena); + parent_allocator.destroy(self.thread_pool); } diff --git a/src/ziql/parser.zig b/src/ziql/parser.zig index 65d1481..2b78ab9 100644 --- a/src/ziql/parser.zig +++ b/src/ziql/parser.zig @@ -7,13 +7,8 @@ const Tokenizer = @import("tokenizer.zig").Tokenizer; const dtype = @import("dtype"); const UUID = dtype.UUID; -const Filter = @import("../dataStructure/filter.zig").Filter; -const Condition = @import("../dataStructure/filter.zig").Condition; -const ConditionValue = @import("../dataStructure/filter.zig").ConditionValue; -const ComparisonOperator = @import("../dataStructure/filter.zig").ComparisonOperator; - +const ValueOrArray = @import("parts/newData.zig").ValueOrArray; const AdditionalData = @import("../dataStructure/additionalData.zig").AdditionalData; -const AdditionalDataMember = @import("../dataStructure/additionalData.zig").AdditionalDataMember; const send = @import("../utils.zig").send; const printError = @import("../utils.zig").printError; @@ -74,8 +69,6 @@ pub usingnamespace @import("parts/filter.zig"); pub usingnamespace @import("parts/additionalData.zig"); pub usingnamespace @import("utils.zig"); -var toker: Tokenizer = undefined; - toker: *Tokenizer = undefined, file_engine: *FileEngine, schema_engine: *SchemaEngine, @@ -94,7 +87,7 @@ pub fn parse(self: *Self, buffer: [:0]const u8) ZipponError!void { try @import("parts/value.zig").initZeroMap(); - toker = Tokenizer.init(buffer); + var toker = Tokenizer.init(buffer); self.toker = &toker; var state: State = .start; @@ -241,9 +234,9 @@ pub fn parse(self: *Self, buffer: [:0]const u8) ZipponError!void { defer members.deinit(); members.appendSlice(sstruct.members[1..]) catch return ZipponError.MemoryError; - var data_map = std.StringHashMap(ConditionValue).init(allocator); + var data_map = std.StringHashMap(ValueOrArray).init(allocator); defer data_map.deinit(); - try self.parseNewData(allocator, &data_map, struct_name, &members); + try self.parseNewData(allocator, &data_map, struct_name, &members, true); var buff = std.ArrayList(u8).init(allocator); defer buff.deinit(); @@ -267,9 +260,9 @@ pub fn parse(self: *Self, buffer: [:0]const u8) ZipponError!void { defer members.deinit(); members.appendSlice(sstruct.members[1..]) catch return ZipponError.MemoryError; - var data_map = std.StringHashMap(ConditionValue).init(allocator); + var data_map = std.StringHashMap(ValueOrArray).init(allocator); defer data_map.deinit(); - try self.parseNewData(allocator, &data_map, struct_name, &members); + try self.parseNewData(allocator, &data_map, struct_name, &members, true); var buff = std.ArrayList(u8).init(allocator); defer buff.deinit(); @@ -340,19 +333,19 @@ pub fn parse(self: *Self, buffer: [:0]const u8) ZipponError!void { defer buff.deinit(); buff.writer().writeAll("[") catch return ZipponError.WriteError; - var maps = std.ArrayList(std.StringHashMap(ConditionValue)).init(allocator); + var maps = std.ArrayList(std.StringHashMap(ValueOrArray)).init(allocator); defer maps.deinit(); var local_arena = std.heap.ArenaAllocator.init(allocator); defer local_arena.deinit(); const local_allocator = arena.allocator(); - var data_map = std.StringHashMap(ConditionValue).init(allocator); + var data_map = std.StringHashMap(ValueOrArray).init(allocator); defer data_map.deinit(); while (true) { // I could multithread that as it do take a long time for big benchmark data_map.clearRetainingCapacity(); - try self.parseNewData(local_allocator, &data_map, struct_name, &order); + try self.parseNewData(local_allocator, &data_map, struct_name, &order, false); var error_message_buffer = std.ArrayList(u8).init(local_allocator); defer error_message_buffer.deinit(); diff --git a/src/ziql/parts/newData.zig b/src/ziql/parts/newData.zig index 25b98f3..2132e1f 100644 --- a/src/ziql/parts/newData.zig +++ b/src/ziql/parts/newData.zig @@ -8,15 +8,34 @@ const ZipponError = @import("error").ZipponError; const Self = @import("../parser.zig"); +// Ok so now for array how do I do. Because the map will not work anymore. +// I guess I change the map member_name -> ConditionValue. The ConditionValue can become an enum, either COnditionValue either a new struct +// The new struct need to have the operation (append, clear, etc) and a piece of data. The data can either be a single ConditionValue or an array of it. +// Or maybe just an array, it can be an array of 1 value. +// Like that I just need do add some switch on the enum to make it work + +pub const ValueOrArray = union(enum) { + value: ConditionValue, + array: ArrayUpdate, +}; + +pub const ArrayCondition = enum { append, clear, pop, remove, removeat }; + +pub const ArrayUpdate = struct { + condition: ArrayCondition, + data: []ConditionValue, +}; + /// Take the tokenizer and return a map of the ADD action. /// Keys are the member name and value are the string of the value in the query. E.g. 'Adrien' or '10' /// Entry token need to be ( pub fn parseNewData( self: Self, allocator: Allocator, - map: *std.StringHashMap(ConditionValue), + map: *std.StringHashMap(ValueOrArray), struct_name: []const u8, order: *std.ArrayList([]const u8), + for_update: bool, ) !void { var token = self.toker.next(); var keep_next = false; @@ -80,8 +99,43 @@ pub fn parseNewData( .expect_equal => switch (token.tag) { // TODO: Implement stuff to manipulate array like APPEND or REMOVE .equal => state = .expect_new_value, + .keyword_pop => if (for_update) {} else return printError( + "Error: Can only manipulate array with UPDATE.", + ZipponError.SynthaxError, + self.toker.buffer, + token.loc.start, + token.loc.end, + ), + .keyword_clear => if (for_update) {} else return printError( + "Error: Can only manipulate array with UPDATE.", + ZipponError.SynthaxError, + self.toker.buffer, + token.loc.start, + token.loc.end, + ), + .keyword_append => if (for_update) {} else return printError( + "Error: Can only manipulate array with UPDATE.", + ZipponError.SynthaxError, + self.toker.buffer, + token.loc.start, + token.loc.end, + ), + .keyword_remove => if (for_update) {} else return printError( + "Error: Can only manipulate array with UPDATE.", + ZipponError.SynthaxError, + self.toker.buffer, + token.loc.start, + token.loc.end, + ), + .keyword_remove_at => if (for_update) {} else return printError( + "Error: Can only manipulate array with UPDATE.", + ZipponError.SynthaxError, + self.toker.buffer, + token.loc.start, + token.loc.end, + ), else => return printError( - "Error: Expected =", + "Error: Expected = or array manipulation keyword (APPEND, CLEAR, POP, REMOVE, REMOVEAT)", ZipponError.SynthaxError, self.toker.buffer, token.loc.start, @@ -91,7 +145,10 @@ pub fn parseNewData( .expect_new_value => { const data_type = self.schema_engine.memberName2DataType(struct_name, member_name) catch return ZipponError.StructNotFound; - map.put(member_name, try self.parseConditionValue(allocator, struct_name, member_name, data_type, &token)) catch return ZipponError.MemoryError; + map.put( + member_name, + ValueOrArray{ .value = try self.parseConditionValue(allocator, struct_name, member_name, data_type, &token) }, + ) catch return ZipponError.MemoryError; if (data_type == .link or data_type == .link_array) { token = self.toker.last_token; keep_next = true; diff --git a/test_runner.zig b/test_runner.zig index ee5cfee..e87e324 100644 --- a/test_runner.zig +++ b/test_runner.zig @@ -5,10 +5,6 @@ const Allocator = std.mem.Allocator; const BORDER = "=" ** 80; -const log_allocator = std.heap.page_allocator; -var log_buff: [1024]u8 = undefined; -var log_path: []const u8 = undefined; - const log = std.log.scoped(.cli); pub const std_options = .{ .logFn = myLog,