From 3f5c929a11f49ff90d22f1a3e0df5a05882dcdeb Mon Sep 17 00:00:00 2001 From: MrBounty Date: Wed, 20 Nov 2024 23:52:36 +0100 Subject: [PATCH] Big memory improvement Started to use fixed buffer at the root of each file. I like that because that mean I can estimate the amount of memory that the app use. The only thing that still use an non fixed allocator is the UUIDFileIndex, but I dont like it. I think it should also use a fixed lenght buffer. But then how can I be sure it is enough ? :/ I mean I get it, I need an allocator here. But if this is the only place I use one, this a shame --- src/config.zig | 5 +- src/fileEngine.zig | 158 +++++++++++++++++-------------------------- src/main.zig | 70 ++++++++----------- src/schemaEngine.zig | 94 +++++++++++++++++++++---- src/schemaParser.zig | 67 +----------------- src/stuffs/utils.zig | 37 +++++----- src/threadEngine.zig | 16 ++--- src/ziqlParser.zig | 29 +++++--- 8 files changed, 224 insertions(+), 252 deletions(-) diff --git a/src/config.zig b/src/config.zig index 084d4a9..9d5ce69 100644 --- a/src/config.zig +++ b/src/config.zig @@ -1,5 +1,6 @@ -pub const BUFFER_SIZE = 1024 * 64 * 64; // Line limit when parsing file and other buffers -pub const MAX_FILE_SIZE = 5e+8; // 500Mb +pub const BUFFER_SIZE = 1024 * 10; // Used a bit everywhere. The size for the schema for example. 10kB +pub const OUT_BUFFER_SIZE = 1024 * 1024 * 16; // Mostly use in the fileEngine for the parsing, limit of what can be write to be send basically. 16MB +pub const MAX_FILE_SIZE = 1024 * 1024 * 64; // 64MB pub const CPU_CORE = 16; // Testing diff --git a/src/fileEngine.zig b/src/fileEngine.zig index 1760810..f5413ad 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -5,7 +5,7 @@ const U64 = std.atomic.Value(u64); const Pool = std.Thread.Pool; const Allocator = std.mem.Allocator; const SchemaEngine = @import("schemaEngine.zig").SchemaEngine; -const SchemaStruct = @import("schemaParser.zig").Parser.SchemaStruct; +const SchemaStruct = @import("schemaEngine.zig").SchemaStruct; const ThreadSyncContext = @import("threadEngine.zig").ThreadSyncContext; const dtype = @import("dtype"); @@ -22,41 +22,32 @@ const FileEngineError = @import("stuffs/errors.zig").FileEngineError; const config = @import("config.zig"); const BUFFER_SIZE = config.BUFFER_SIZE; +const OUT_BUFFER_SIZE = config.OUT_BUFFER_SIZE; const MAX_FILE_SIZE = config.MAX_FILE_SIZE; const RESET_LOG_AT_RESTART = config.RESET_LOG_AT_RESTART; const CPU_CORE = config.CPU_CORE; const log = std.log.scoped(.fileEngine); -// TODO: Start using State at the start and end of each function for debugging -const FileEngineState = enum { Parsing, Waiting }; +// I really like that, just some buffer in each file. Like that I can know EXACTLY how many memory I give the DB +var parsing_buffer: [OUT_BUFFER_SIZE]u8 = undefined; +var path_buffer: [1024]u8 = undefined; +var path_to_ZipponDB_dir_buffer: [1024]u8 = undefined; /// Manage everything that is relate to read or write in files /// Or even get stats, whatever. If it touch files, it's here pub const FileEngine = struct { - allocator: Allocator, - state: FileEngineState, path_to_ZipponDB_dir: []const u8, - schema_engine: SchemaEngine = undefined, // I dont really like that here - thread_pool: *Pool = undefined, + thread_pool: *Pool, // same pool as the ThreadEngine + schema_engine: SchemaEngine = undefined, // This is init after the FileEngine and I attach after. Do I need to init after tho ? - pub fn init(allocator: Allocator, path: []const u8, thread_pool: *Pool) ZipponError!FileEngine { + pub fn init(path: []const u8, thread_pool: *Pool) ZipponError!FileEngine { return FileEngine{ - .allocator = allocator, - .path_to_ZipponDB_dir = allocator.dupe(u8, path) catch return ZipponError.MemoryError, - .state = .Waiting, + .path_to_ZipponDB_dir = std.fmt.bufPrint(&path_to_ZipponDB_dir_buffer, "{s}", .{path}) catch return ZipponError.MemoryError, .thread_pool = thread_pool, }; } - pub fn deinit(self: *FileEngine) void { - self.allocator.free(self.path_to_ZipponDB_dir); - } - - pub fn usable(self: FileEngine) bool { - return self.state == .Waiting; - } - // --------------------Other-------------------- pub fn readSchemaFile(sub_path: []const u8, buffer: []u8) ZipponError!usize { @@ -105,8 +96,7 @@ pub const FileEngine = struct { /// Create the main folder. Including DATA, LOG and BACKUP /// TODO: Maybe start using a fixed lenght buffer instead of free everytime, but that not that important pub fn createMainDirectories(self: *FileEngine) ZipponError!void { - var path_buff = std.fmt.allocPrint(self.allocator, "{s}", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; - defer self.allocator.free(path_buff); + var path_buff = std.fmt.bufPrint(&path_buffer, "{s}", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; const cwd = std.fs.cwd(); @@ -115,32 +105,28 @@ pub const FileEngine = struct { else => return FileEngineError.CantMakeDir, }; - self.allocator.free(path_buff); - path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; + path_buff = std.fmt.bufPrint(&path_buffer, "{s}/DATA", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; cwd.makeDir(path_buff) catch |err| switch (err) { error.PathAlreadyExists => {}, else => return FileEngineError.CantMakeDir, }; - self.allocator.free(path_buff); - path_buff = std.fmt.allocPrint(self.allocator, "{s}/BACKUP", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; + path_buff = std.fmt.bufPrint(&path_buffer, "{s}/BACKUP", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; cwd.makeDir(path_buff) catch |err| switch (err) { error.PathAlreadyExists => {}, else => return FileEngineError.CantMakeDir, }; - self.allocator.free(path_buff); - path_buff = std.fmt.allocPrint(self.allocator, "{s}/LOG", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; + path_buff = std.fmt.bufPrint(&path_buffer, "{s}/LOG", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; cwd.makeDir(path_buff) catch |err| switch (err) { error.PathAlreadyExists => {}, else => return FileEngineError.CantMakeDir, }; - self.allocator.free(path_buff); - path_buff = std.fmt.allocPrint(self.allocator, "{s}/LOG/log", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; + path_buff = std.fmt.bufPrint(&path_buffer, "{s}/LOG/log", .{self.path_to_ZipponDB_dir}) catch return FileEngineError.MemoryError; if (RESET_LOG_AT_RESTART) { _ = cwd.createFile(path_buff, .{}) catch return FileEngineError.CantMakeFile; @@ -173,6 +159,10 @@ pub const FileEngine = struct { /// Use a struct name to populate a list with all UUID of this struct /// TODO: Multi thread that too pub fn getNumberOfEntity(self: *FileEngine, struct_name: []const u8) ZipponError!usize { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); var count: usize = 0; @@ -180,10 +170,9 @@ pub const FileEngine = struct { const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{}); for (0..(max_file_index + 1)) |i| { - const path_buff = std.fmt.allocPrint(self.allocator, "{d}.zid", .{i}) catch return FileEngineError.MemoryError; - defer self.allocator.free(path_buff); + const path_buff = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{i}) catch return FileEngineError.MemoryError; - var iter = zid.DataIterator.init(self.allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError; + var iter = zid.DataIterator.init(allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError; defer iter.deinit(); while (iter.next() catch return FileEngineError.ZipponDataError) |_| count += 1; @@ -201,6 +190,10 @@ pub const FileEngine = struct { sstruct: SchemaStruct, map: *UUIDFileIndex, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const max_file_index = try self.maxFileIndex(sstruct.name); const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{}); @@ -212,14 +205,14 @@ pub const FileEngine = struct { ); // Create a thread-safe writer for each file - var thread_writer_list = self.allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return FileEngineError.MemoryError; + var thread_writer_list = allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return FileEngineError.MemoryError; defer { for (thread_writer_list) |list| list.deinit(); - self.allocator.free(thread_writer_list); + allocator.free(thread_writer_list); } for (thread_writer_list) |*list| { - list.* = std.ArrayList(UUID).init(self.allocator); + list.* = std.ArrayList(UUID).init(allocator); } // Spawn threads for each file @@ -256,7 +249,6 @@ pub const FileEngine = struct { defer fa.reset(); const allocator = fa.allocator(); - var path_buffer: [128]u8 = undefined; const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -287,6 +279,10 @@ pub const FileEngine = struct { map: *std.AutoHashMap(UUID, void), additional_data: *AdditionalData, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); @@ -299,14 +295,10 @@ pub const FileEngine = struct { ); // Create a thread-safe writer for each file - var thread_writer_list = self.allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return FileEngineError.MemoryError; - defer { - for (thread_writer_list) |list| list.deinit(); - self.allocator.free(thread_writer_list); - } + var thread_writer_list = allocator.alloc(std.ArrayList(UUID), max_file_index + 1) catch return FileEngineError.MemoryError; for (thread_writer_list) |*list| { - list.* = std.ArrayList(UUID).init(self.allocator); + list.* = std.ArrayList(UUID).init(allocator); } // Spawn threads for each file @@ -355,7 +347,6 @@ pub const FileEngine = struct { defer fa.reset(); const allocator = fa.allocator(); - var path_buffer: [128]u8 = undefined; const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -394,6 +385,10 @@ pub const FileEngine = struct { additional_data: *AdditionalData, writer: anytype, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); @@ -401,7 +396,7 @@ pub const FileEngine = struct { // If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them if (additional_data.member_to_find.items.len == 0) { - additional_data.populateWithEverything(self.allocator, sstruct.members) catch return FileEngineError.MemoryError; + additional_data.populateWithEverything(allocator, sstruct.members) catch return FileEngineError.MemoryError; } // Open the dir that contain all files @@ -415,18 +410,12 @@ pub const FileEngine = struct { // Do one array and writer for each thread otherwise then create error by writing at the same time // Maybe use fixed lenght buffer for speed here - var thread_writer_list = self.allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; + var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; defer { for (thread_writer_list) |list| list.deinit(); - self.allocator.free(thread_writer_list); + allocator.free(thread_writer_list); } - // Maybe do one buffer per files ? - var data_buffer: [BUFFER_SIZE]u8 = undefined; - var fa = std.heap.FixedBufferAllocator.init(&data_buffer); - defer fa.reset(); - const allocator = fa.allocator(); - // Start parsing all file in multiple thread for (0..(max_file_index + 1)) |file_index| { thread_writer_list[file_index] = std.ArrayList(u8).init(allocator); @@ -470,7 +459,6 @@ pub const FileEngine = struct { defer fa.reset(); const allocator = fa.allocator(); - var path_buffer: [16]u8 = undefined; const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -584,21 +572,14 @@ pub const FileEngine = struct { writer: anytype, n: usize, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const file_index = try self.getFirstUsableIndexFile(struct_name); - const path = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.zid", - .{ self.path_to_ZipponDB_dir, struct_name, file_index }, - ) catch return FileEngineError.MemoryError; - defer self.allocator.free(path); - - var data_buffer: [BUFFER_SIZE]u8 = undefined; - var fa = std.heap.FixedBufferAllocator.init(&data_buffer); - defer fa.reset(); - const data_allocator = fa.allocator(); - - const data = try self.orderedNewData(data_allocator, struct_name, map); + const path = std.fmt.bufPrint(&path_buffer, "{s}/DATA/{s}/{d}.zid", .{ self.path_to_ZipponDB_dir, struct_name, file_index }) catch return FileEngineError.MemoryError; + const data = try self.orderedNewData(allocator, struct_name, map); var data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError; defer data_writer.deinit(); @@ -617,6 +598,10 @@ pub const FileEngine = struct { writer: anytype, additional_data: *AdditionalData, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); @@ -629,29 +614,19 @@ pub const FileEngine = struct { ); // Create a thread-safe writer for each file - var thread_writer_list = self.allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; - defer { - for (thread_writer_list) |list| list.deinit(); - self.allocator.free(thread_writer_list); - } - + var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; for (thread_writer_list) |*list| { - list.* = std.ArrayList(u8).init(self.allocator); + list.* = std.ArrayList(u8).init(allocator); } - var data_buffer: [BUFFER_SIZE]u8 = undefined; - var fa = std.heap.FixedBufferAllocator.init(&data_buffer); - defer fa.reset(); - const data_allocator = fa.allocator(); - - var new_data_buff = data_allocator.alloc(zid.Data, sstruct.members.len) catch return ZipponError.MemoryError; + var new_data_buff = allocator.alloc(zid.Data, sstruct.members.len) catch return ZipponError.MemoryError; // Convert the map to an array of ZipponData Data type, to be use with ZipponData writter for (sstruct.members, 0..) |member, i| { if (!map.contains(member)) continue; const dt = try self.schema_engine.memberName2DataType(struct_name, member); - new_data_buff[i] = try string2Data(data_allocator, dt, map.get(member).?); + new_data_buff[i] = try string2Data(allocator, dt, map.get(member).?); } // Spawn threads for each file @@ -697,7 +672,6 @@ pub const FileEngine = struct { defer fa.reset(); const allocator = fa.allocator(); - var path_buffer: [128]u8 = undefined; const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -791,6 +765,10 @@ pub const FileEngine = struct { writer: anytype, additional_data: *AdditionalData, ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); @@ -803,14 +781,9 @@ pub const FileEngine = struct { ); // Create a thread-safe writer for each file - var thread_writer_list = self.allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; - defer { - for (thread_writer_list) |list| list.deinit(); - self.allocator.free(thread_writer_list); - } - + var thread_writer_list = allocator.alloc(std.ArrayList(u8), max_file_index + 1) catch return FileEngineError.MemoryError; for (thread_writer_list) |*list| { - list.* = std.ArrayList(u8).init(self.allocator); + list.* = std.ArrayList(u8).init(allocator); } // Spawn threads for each file @@ -852,7 +825,6 @@ pub const FileEngine = struct { defer fa.reset(); const allocator = fa.allocator(); - var path_buffer: [128]u8 = undefined; const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { sync_context.logError("Error creating file path", err); return; @@ -1031,13 +1003,7 @@ pub const FileEngine = struct { } } - const path = std.fmt.allocPrint( - self.allocator, - "{s}/DATA/{s}/{d}.zid", - .{ self.path_to_ZipponDB_dir, struct_name, i }, - ) catch return FileEngineError.MemoryError; - defer self.allocator.free(path); - + const path = std.fmt.bufPrint(&path_buffer, "{s}/DATA/{s}/{d}.zid", .{ self.path_to_ZipponDB_dir, struct_name, i }) catch return FileEngineError.MemoryError; zid.createFile(path, null) catch return FileEngineError.ZipponDataError; return i; diff --git a/src/main.zig b/src/main.zig index aa4a2dc..c8ff37b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -36,6 +36,10 @@ const State = enum { const log_allocator = std.heap.page_allocator; var log_buff: [1024]u8 = undefined; var log_path: []const u8 = undefined; +var path_buffer: [1024]u8 = undefined; +var line_buffer: [BUFFER_SIZE]u8 = undefined; +var in_buffer: [BUFFER_SIZE]u8 = undefined; +var out_buffer: [BUFFER_SIZE]u8 = undefined; const log = std.log.scoped(.cli); pub const std_options = .{ @@ -45,26 +49,21 @@ pub const std_options = .{ const DBEngineState = enum { MissingFileEngine, MissingSchemaEngine, Ok, Init }; pub const DBEngine = struct { - allocator: Allocator, state: DBEngineState = .Init, file_engine: FileEngine = undefined, schema_engine: SchemaEngine = undefined, thread_engine: ThreadEngine = undefined, - pub fn init(allocator: std.mem.Allocator, potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) DBEngine { - var self = DBEngine{ .allocator = allocator }; + pub fn init(potential_main_path: ?[]const u8, potential_schema_path: ?[]const u8) DBEngine { + var self = DBEngine{}; - self.thread_engine = ThreadEngine.init(allocator); - - const potential_main_path_or_environment_variable = potential_main_path orelse utils.getEnvVariable(allocator, "ZIPPONDB_PATH"); - defer { - if (potential_main_path_or_environment_variable != null and potential_main_path == null) allocator.free(potential_main_path_or_environment_variable.?); - } + self.thread_engine = ThreadEngine.init(); + const potential_main_path_or_environment_variable = potential_main_path orelse utils.getEnvVariable("ZIPPONDB_PATH"); if (potential_main_path_or_environment_variable) |main_path| { log_path = std.fmt.bufPrint(&log_buff, "{s}/LOG/log", .{main_path}) catch ""; log.info("Found ZIPPONDB_PATH: {s}.", .{main_path}); - self.file_engine = FileEngine.init(self.allocator, main_path, self.thread_engine.thread_pool) catch { + self.file_engine = FileEngine.init(main_path, self.thread_engine.thread_pool) catch { log.err("Error when init FileEngine", .{}); self.state = .MissingFileEngine; return self; @@ -83,14 +82,13 @@ pub const DBEngine = struct { } if (self.file_engine.isSchemaFileInDir() and potential_schema_path == null) { - const schema_path = std.fmt.allocPrint(allocator, "{s}/schema", .{self.file_engine.path_to_ZipponDB_dir}) catch { + const schema_path = std.fmt.bufPrint(&path_buffer, "{s}/schema", .{self.file_engine.path_to_ZipponDB_dir}) catch { self.state = .MissingSchemaEngine; return self; }; - defer allocator.free(schema_path); log.info("Schema founded in the database directory.", .{}); - self.schema_engine = SchemaEngine.init(self.allocator, schema_path, &self.file_engine) catch |err| { + self.schema_engine = SchemaEngine.init(schema_path, &self.file_engine) catch |err| { log.err("Error when init SchemaEngine: {any}", .{err}); self.state = .MissingSchemaEngine; return self; @@ -110,11 +108,10 @@ pub const DBEngine = struct { } log.info("Database don't have any schema yet, trying to add one.", .{}); - const potential_schema_path_or_environment_variable = potential_schema_path orelse utils.getEnvVariable(allocator, "ZIPPONDB_SCHEMA"); - if (potential_schema_path_or_environment_variable != null and potential_schema_path == null) allocator.free(potential_main_path_or_environment_variable.?); + const potential_schema_path_or_environment_variable = potential_schema_path orelse utils.getEnvVariable("ZIPPONDB_SCHEMA"); if (potential_schema_path_or_environment_variable) |schema_path| { log.info("Found schema path {s}.", .{schema_path}); - self.schema_engine = SchemaEngine.init(self.allocator, schema_path, &self.file_engine) catch |err| { + self.schema_engine = SchemaEngine.init(schema_path, &self.file_engine) catch |err| { log.err("Error when init SchemaEngine: {any}", .{err}); self.state = .MissingSchemaEngine; return self; @@ -135,9 +132,7 @@ pub const DBEngine = struct { } pub fn deinit(self: *DBEngine) void { - if (self.state == .Ok or self.state == .MissingSchemaEngine) self.file_engine.deinit(); // Pretty sure I can use like state > 2 because enum of just number if (self.state == .Ok) self.schema_engine.deinit(); - self.thread_engine.deinit(); } pub fn runQuery(self: *DBEngine, null_term_query_str: [:0]const u8) void { @@ -199,31 +194,26 @@ pub fn myLog( // TODO: If an argument is given when starting the binary, it is the db path pub fn main() !void { - errdefer log.warn("Main function ended with an error", .{}); - - var gpa = std.heap.GeneralPurposeAllocator(.{}){}; - const allocator = gpa.allocator(); - defer switch (gpa.deinit()) { - .ok => {}, - .leak => log.debug("We fucked it up bro...\n", .{}), - }; - - var db_engine = DBEngine.init(allocator, null, null); + var db_engine = DBEngine.init(null, null); defer db_engine.deinit(); - const line_buf = try allocator.alloc(u8, BUFFER_SIZE); - defer allocator.free(line_buf); + var fa = std.heap.FixedBufferAllocator.init(&out_buffer); + const allocator = fa.allocator(); while (true) { + fa.reset(); + db_engine.thread_engine.reset(); std.debug.print("> ", .{}); // TODO: Find something better than just std.debug.print - const line = try std.io.getStdIn().reader().readUntilDelimiterOrEof(line_buf, '\n'); + const line = std.io.getStdIn().reader().readUntilDelimiterOrEof(&in_buffer, '\n') catch { + log.debug("Command too long for buffer", .{}); + continue; + }; if (line) |line_str| { const start_time = std.time.milliTimestamp(); log.debug("Query received: {s}", .{line_str}); - const null_term_line_str = try allocator.dupeZ(u8, line_str[0..line_str.len]); - defer allocator.free(null_term_line_str); + const null_term_line_str = try std.fmt.bufPrintZ(&line_buffer, "{s}", .{line_str}); var toker = cliTokenizer.init(null_term_line_str); var token = toker.next(); @@ -299,11 +289,7 @@ pub fn main() !void { .expect_path_to_db => switch (token.tag) { .identifier => { db_engine.deinit(); - db_engine = DBEngine.init( - allocator, - try allocator.dupe(u8, toker.getTokenSlice(token)), - null, - ); + db_engine = DBEngine.init(toker.getTokenSlice(token), null); state = .end; }, else => { @@ -316,7 +302,7 @@ pub fn main() !void { .string_literal => { const null_term_query_str = try allocator.dupeZ(u8, toker.buffer[token.loc.start + 1 .. token.loc.end - 1]); defer allocator.free(null_term_query_str); - db_engine.runQuery(null_term_query_str); + db_engine.runQuery(null_term_query_str); // TODO: THis should return something and I should send from here, not from the parser state = .end; }, .keyword_help => { @@ -333,7 +319,7 @@ pub fn main() !void { .keyword_describe => { if (db_engine.state == .MissingFileEngine) send("Error: No database selected. Please use 'db new' or 'db use'.", .{}); if (db_engine.state == .MissingSchemaEngine) send("Error: No schema in database. Please use 'schema init'.", .{}); - send("Schema:\n {s}", .{db_engine.schema_engine.null_terminated_schema_buff}); + send("Schema:\n {s}", .{db_engine.schema_engine.null_terminated}); state = .end; }, .keyword_init => { @@ -354,8 +340,8 @@ pub fn main() !void { .identifier => { const main_path = try allocator.dupe(u8, db_engine.file_engine.path_to_ZipponDB_dir); db_engine.deinit(); - db_engine = DBEngine.init(allocator, main_path, toker.getTokenSlice(token)); - try db_engine.file_engine.writeSchemaFile(db_engine.schema_engine.null_terminated_schema_buff); + db_engine = DBEngine.init(main_path, toker.getTokenSlice(token)); + try db_engine.file_engine.writeSchemaFile(db_engine.schema_engine.null_terminated); state = .end; }, else => { diff --git a/src/schemaEngine.zig b/src/schemaEngine.zig index 033cf23..00eb198 100644 --- a/src/schemaEngine.zig +++ b/src/schemaEngine.zig @@ -1,37 +1,108 @@ const std = @import("std"); +const zid = @import("ZipponData"); const Allocator = std.mem.Allocator; -const SchemaStruct = @import("schemaParser.zig").Parser.SchemaStruct; const Parser = @import("schemaParser.zig").Parser; const Tokenizer = @import("tokenizers/schema.zig").Tokenizer; const ZipponError = @import("stuffs/errors.zig").ZipponError; const dtype = @import("dtype"); const DataType = dtype.DataType; +const UUIDFileIndex = @import("stuffs/UUIDFileIndex.zig").UUIDIndexMap; const FileEngine = @import("fileEngine.zig").FileEngine; const config = @import("config.zig"); const BUFFER_SIZE = config.BUFFER_SIZE; +var schema_buffer: [BUFFER_SIZE]u8 = undefined; + const log = std.log.scoped(.schemaEngine); +// TODO: Make better memory management + +pub const SchemaStruct = struct { + allocator: Allocator, + name: []const u8, + members: [][]const u8, + types: []DataType, + zid_schema: []zid.DType, + links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name of the link + uuid_file_index: *UUIDFileIndex, // Map UUID to the index of the file store in + + pub fn init( + allocator: Allocator, + name: []const u8, + members: [][]const u8, + types: []DataType, + links: std.StringHashMap([]const u8), + ) ZipponError!SchemaStruct { + const uuid_file_index = allocator.create(UUIDFileIndex) catch return ZipponError.MemoryError; + uuid_file_index.* = UUIDFileIndex.init(allocator) catch return ZipponError.MemoryError; + return SchemaStruct{ + .allocator = allocator, + .name = name, + .members = members, + .types = types, + .zid_schema = SchemaStruct.fileDataSchema(allocator, types) catch return ZipponError.MemoryError, + .links = links, + .uuid_file_index = uuid_file_index, + }; + } + + pub fn deinit(self: *SchemaStruct) void { + self.allocator.free(self.members); + self.allocator.free(self.types); + self.allocator.free(self.zid_schema); + self.links.deinit(); + self.uuid_file_index.deinit(); + self.allocator.destroy(self.uuid_file_index); + } + + fn fileDataSchema(allocator: Allocator, dtypes: []DataType) ZipponError![]zid.DType { + var schema = std.ArrayList(zid.DType).init(allocator); + + for (dtypes) |dt| { + schema.append(switch (dt) { + .int => .Int, + .float => .Float, + .str => .Str, + .bool => .Bool, + .link => .UUID, + .self => .UUID, + .date => .Unix, + .time => .Unix, + .datetime => .Unix, + .int_array => .IntArray, + .float_array => .FloatArray, + .str_array => .StrArray, + .bool_array => .BoolArray, + .date_array => .UnixArray, + .time_array => .UnixArray, + .datetime_array => .UnixArray, + .link_array => .UUIDArray, + }) catch return ZipponError.MemoryError; + } + return schema.toOwnedSlice() catch return ZipponError.MemoryError; + } +}; + /// Manage everything that is relate to the schema /// This include keeping in memory the schema and schema file, and some functions to get like all members of a specific struct. /// For now it is a bit empty. But this is where I will manage migration pub const SchemaEngine = struct { allocator: Allocator, - null_terminated_schema_buff: [:0]u8, struct_array: []SchemaStruct, + null_terminated: [:0]u8, // The path is the path to the schema file - pub fn init(allocator: Allocator, path: []const u8, file_engine: *FileEngine) ZipponError!SchemaEngine { + pub fn init(path: []const u8, file_engine: *FileEngine) ZipponError!SchemaEngine { + const allocator = std.heap.page_allocator; + + var buffer: [BUFFER_SIZE]u8 = undefined; + log.debug("Trying to init a SchemaEngine with path {s}", .{path}); - var schema_buf = allocator.alloc(u8, BUFFER_SIZE) catch return ZipponError.MemoryError; - defer allocator.free(schema_buf); + const len: usize = try FileEngine.readSchemaFile(path, &buffer); + const null_terminated = std.fmt.bufPrintZ(&schema_buffer, "{s}", .{buffer[0..len]}) catch return ZipponError.MemoryError; - const len: usize = try FileEngine.readSchemaFile(path, schema_buf); - const null_terminated_schema_buff = allocator.dupeZ(u8, schema_buf[0..len]) catch return ZipponError.MemoryError; - errdefer allocator.free(null_terminated_schema_buff); - - var toker = Tokenizer.init(null_terminated_schema_buff); + var toker = Tokenizer.init(null_terminated); var parser = Parser.init(&toker, allocator); var struct_array = std.ArrayList(SchemaStruct).init(allocator); @@ -48,15 +119,14 @@ pub const SchemaEngine = struct { return SchemaEngine{ .allocator = allocator, - .null_terminated_schema_buff = null_terminated_schema_buff, .struct_array = struct_array.toOwnedSlice() catch return ZipponError.MemoryError, + .null_terminated = null_terminated, }; } pub fn deinit(self: *SchemaEngine) void { for (self.struct_array) |*elem| elem.deinit(); self.allocator.free(self.struct_array); - self.allocator.free(self.null_terminated_schema_buff); } /// Get the type of the member diff --git a/src/schemaParser.zig b/src/schemaParser.zig index 2314cac..3f00d56 100644 --- a/src/schemaParser.zig +++ b/src/schemaParser.zig @@ -1,12 +1,12 @@ const std = @import("std"); const zid = @import("ZipponData"); +const SchemaStruct = @import("schemaEngine.zig").SchemaStruct; const Allocator = std.mem.Allocator; const DataType = @import("dtype").DataType; const UUID = @import("dtype").UUID; const Toker = @import("tokenizers/schema.zig").Tokenizer; const Token = @import("tokenizers/schema.zig").Token; const Loc = @import("tokenizers/shared/loc.zig").Loc; -const UUIDFileIndex = @import("stuffs/UUIDFileIndex.zig").UUIDIndexMap; const send = @import("stuffs/utils.zig").send; const printError = @import("stuffs/utils.zig").printError; @@ -38,71 +38,6 @@ pub const Parser = struct { } // Rename something better and move it somewhere else - pub const SchemaStruct = struct { - allocator: Allocator, - name: []const u8, - members: [][]const u8, - types: []DataType, - zid_schema: []zid.DType, - links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name of the link - uuid_file_index: *UUIDFileIndex, // Map UUID to the index of the file store in - - pub fn init( - allocator: Allocator, - name: []const u8, - members: [][]const u8, - types: []DataType, - links: std.StringHashMap([]const u8), - ) SchemaParserError!SchemaStruct { - const uuid_file_index = allocator.create(UUIDFileIndex) catch return SchemaParserError.MemoryError; - uuid_file_index.* = UUIDFileIndex.init(allocator) catch return SchemaParserError.MemoryError; - return SchemaStruct{ - .allocator = allocator, - .name = name, - .members = members, - .types = types, - .zid_schema = SchemaStruct.fileDataSchema(allocator, types) catch return SchemaParserError.MemoryError, - .links = links, - .uuid_file_index = uuid_file_index, - }; - } - - pub fn deinit(self: *SchemaStruct) void { - self.allocator.free(self.members); - self.allocator.free(self.types); - self.allocator.free(self.zid_schema); - self.links.deinit(); - self.uuid_file_index.deinit(); - self.allocator.destroy(self.uuid_file_index); - } - - fn fileDataSchema(allocator: Allocator, dtypes: []DataType) SchemaParserError![]zid.DType { - var schema = std.ArrayList(zid.DType).init(allocator); - - for (dtypes) |dt| { - schema.append(switch (dt) { - .int => .Int, - .float => .Float, - .str => .Str, - .bool => .Bool, - .link => .UUID, - .self => .UUID, - .date => .Unix, - .time => .Unix, - .datetime => .Unix, - .int_array => .IntArray, - .float_array => .FloatArray, - .str_array => .StrArray, - .bool_array => .BoolArray, - .date_array => .UnixArray, - .time_array => .UnixArray, - .datetime_array => .UnixArray, - .link_array => .UUIDArray, - }) catch return SchemaParserError.MemoryError; - } - return schema.toOwnedSlice() catch return SchemaParserError.MemoryError; - } - }; pub fn parse(self: *Parser, struct_array: *std.ArrayList(SchemaStruct)) !void { var state: State = .expect_struct_name_OR_end; diff --git a/src/stuffs/utils.zig b/src/stuffs/utils.zig index d94883e..04239bf 100644 --- a/src/stuffs/utils.zig +++ b/src/stuffs/utils.zig @@ -3,14 +3,21 @@ const ZipponError = @import("errors.zig").ZipponError; const log = std.log.scoped(.utils); -pub fn getEnvVariable(allocator: std.mem.Allocator, variable: []const u8) ?[]const u8 { +// This use 2MB / 2048KB of memory +var map_error_buffer: [1024 * 1024]u8 = undefined; // This is for map AND error, not map of error and whatever +var value_buffer: [1024]u8 = undefined; +var path_buffer: [1024 * 1024]u8 = undefined; + +pub fn getEnvVariable(variable: []const u8) ?[]const u8 { + var fa = std.heap.FixedBufferAllocator.init(&map_error_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + var env_map = std.process.getEnvMap(allocator) catch return null; - defer env_map.deinit(); var iter = env_map.iterator(); - while (iter.next()) |entry| { - if (std.mem.eql(u8, entry.key_ptr.*, variable)) return allocator.dupe(u8, entry.value_ptr.*) catch return null; + if (std.mem.eql(u8, entry.key_ptr.*, variable)) return std.fmt.bufPrint(&value_buffer, "{s}", .{entry.value_ptr.*}) catch return null; } return null; @@ -49,10 +56,12 @@ pub fn send(comptime format: []const u8, args: anytype) void { /// Print an error and send it to the user pointing to the token pub fn printError(message: []const u8, err: ZipponError, query: ?[]const u8, start: ?usize, end: ?usize) ZipponError { - const allocator = std.heap.page_allocator; + var fa = std.heap.FixedBufferAllocator.init(&map_error_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + var buffer = std.ArrayList(u8).init(allocator); defer buffer.deinit(); - var writer = buffer.writer(); writer.print("{{\"error\": \"", .{}) catch {}; @@ -60,11 +69,9 @@ pub fn printError(message: []const u8, err: ZipponError, query: ?[]const u8, sta writer.print("{s}\n", .{message}) catch {}; if ((start != null) and (end != null) and (query != null)) { - const buffer_query = allocator.dupe(u8, query.?) catch return ZipponError.MemoryError; - defer allocator.free(buffer_query); - - std.mem.replaceScalar(u8, buffer_query, '\n', ' '); - writer.print("{s}\n", .{buffer_query}) catch {}; + const query_buffer = std.fmt.bufPrint(&map_error_buffer, "{s}", .{query.?}) catch return ZipponError.MemoryError; + std.mem.replaceScalar(u8, query_buffer, '\n', ' '); + writer.print("{s}\n", .{query.?}) catch {}; // Calculate the number of spaces needed to reach the start position. var spaces: usize = 0; @@ -88,15 +95,11 @@ pub fn printError(message: []const u8, err: ZipponError, query: ?[]const u8, sta } pub fn printOpenDir(comptime format: []const u8, args: anytype, options: std.fs.Dir.OpenDirOptions) ZipponError!std.fs.Dir { - var buff: [1024 * 16]u8 = undefined; // INFO: Hard coded buffer size - - const path = std.fmt.bufPrint(&buff, format, args) catch return ZipponError.CantOpenDir; + const path = std.fmt.bufPrint(&path_buffer, format, args) catch return ZipponError.CantOpenDir; return std.fs.cwd().openDir(path, options) catch ZipponError.CantOpenDir; } pub fn printOpenFile(comptime format: []const u8, args: anytype, options: std.fs.File.OpenFlags) ZipponError!std.fs.File { - var buff: [1024 * 16]u8 = undefined; // INFO: Hard coded buffer size - - const path = std.fmt.bufPrint(&buff, format, args) catch return ZipponError.CantOpenDir; + const path = std.fmt.bufPrint(&path_buffer, format, args) catch return ZipponError.CantOpenDir; return std.fs.cwd().openFile(path, options) catch ZipponError.CantOpenFile; } diff --git a/src/threadEngine.zig b/src/threadEngine.zig index 0a90a51..6525b04 100644 --- a/src/threadEngine.zig +++ b/src/threadEngine.zig @@ -7,8 +7,13 @@ const Allocator = std.mem.Allocator; const ZipponError = @import("stuffs/errors.zig").ZipponError; const CPU_CORE = @import("config.zig").CPU_CORE; +const BUFFER_SIZE = @import("config.zig").BUFFER_SIZE; const log = std.log.scoped(.thread); +var alloc_buff: [BUFFER_SIZE]u8 = undefined; +var fa = std.heap.FixedBufferAllocator.init(&alloc_buff); +const allocator = fa.allocator(); + pub const ThreadSyncContext = struct { processed_struct: std.atomic.Value(u64) = std.atomic.Value(u64).init(0), error_file: std.atomic.Value(u64) = std.atomic.Value(u64).init(0), @@ -50,12 +55,10 @@ pub const ThreadSyncContext = struct { }; pub const ThreadEngine = struct { - allocator: Allocator, thread_arena: *std.heap.ThreadSafeAllocator = undefined, thread_pool: *Pool = undefined, - // TODO: Make better error handeling - pub fn init(allocator: Allocator) ThreadEngine { + pub fn init() ThreadEngine { const thread_arena = allocator.create(std.heap.ThreadSafeAllocator) catch @panic("=("); thread_arena.* = std.heap.ThreadSafeAllocator{ .child_allocator = allocator, @@ -68,15 +71,12 @@ pub const ThreadEngine = struct { }) catch @panic("=("); return ThreadEngine{ - .allocator = allocator, .thread_pool = thread_pool, .thread_arena = thread_arena, }; } - pub fn deinit(self: *ThreadEngine) void { - self.thread_pool.deinit(); - self.allocator.destroy(self.thread_pool); - self.allocator.destroy(self.thread_arena); + pub fn reset(_: ThreadEngine) void { + fa.reset(); } }; diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index f7e35fb..084272a 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -71,6 +71,7 @@ pub const Parser = struct { file_engine: *FileEngine, schema_engine: *SchemaEngine, + // TODO: Improve memory management, stop using an alloc in init maybe pub fn init(allocator: Allocator, toker: *Tokenizer, file_engine: *FileEngine, schema_engine: *SchemaEngine) Parser { // Do I need to init a FileEngine at each Parser, can't I put it in the CLI parser instead ? return Parser{ @@ -1127,13 +1128,23 @@ test "Specific query" { try testParsing("GRAB User [1]"); } -test "Relationship" { - try testParsing("GRAB User {best_friend IN {name = 'Bob'}}"); -} +// TODO: next step is to make this work -test "DELETE" { - try testParsing("DELETE User {name='Bob'}"); -} +//test "ADD relationship" { +// try testParsing("ADD User (name = 'Boba', email='boba@email.com', age=25, scores=[ ], best_friend={name='Bob'}, bday=2000/01/01, a_time=12:04, last_order=2000/01/01-12:45)"); +//} + +//test "UPDATE relationship" { +// try testParsing("UPDATE User [1] {} TO (best_friend={name='Boba'})"); +//} + +//test "GRAB Relationship" { +// try testParsing("GRAB User {best_friend IN {name = 'Bob'}}"); +//} + +//test "DELETE" { +// try testParsing("DELETE User {name='Bob'}"); +//} test "Synthax error" { try expectParsingError("GRAB {}", ZiQlParserError.StructNotFound); @@ -1151,7 +1162,7 @@ fn testParsing(source: [:0]const u8) !void { const TEST_DATA_DIR = @import("config.zig").TEST_DATA_DIR; const allocator = std.testing.allocator; - var db_engine = DBEngine.init(allocator, TEST_DATA_DIR, null); + var db_engine = DBEngine.init(TEST_DATA_DIR, null); defer db_engine.deinit(); var toker = Tokenizer.init(source); @@ -1169,7 +1180,7 @@ fn expectParsingError(source: [:0]const u8, err: ZiQlParserError) !void { const TEST_DATA_DIR = @import("config.zig").TEST_DATA_DIR; const allocator = std.testing.allocator; - var db_engine = DBEngine.init(allocator, TEST_DATA_DIR, null); + var db_engine = DBEngine.init(TEST_DATA_DIR, null); defer db_engine.deinit(); var toker = Tokenizer.init(source); @@ -1196,7 +1207,7 @@ fn testParseFilter(source: [:0]const u8) !void { const TEST_DATA_DIR = @import("config.zig").TEST_DATA_DIR; const allocator = std.testing.allocator; - var db_engine = DBEngine.init(allocator, TEST_DATA_DIR, null); + var db_engine = DBEngine.init(TEST_DATA_DIR, null); defer db_engine.deinit(); var toker = Tokenizer.init(source);