From dba73ce1130a3eab3c692eebf06e44316957a2d2 Mon Sep 17 00:00:00 2001 From: MrBounty Date: Sat, 2 Nov 2024 22:12:47 +0100 Subject: [PATCH] Started to use fixed lenght alloc for performance For very importatnt stuff like the writter that write data when parsing, started to use fixed length because it take the majority of the time to write, not to parse =/ Gonna need to improve that --- src/config.zig | 2 +- src/fileEngine.zig | 50 +++++++++++++++++++++++++------------------- src/stuffs/utils.zig | 2 ++ src/ziqlParser.zig | 47 +++++++++++++++++++++++++---------------- 4 files changed, 61 insertions(+), 40 deletions(-) diff --git a/src/config.zig b/src/config.zig index 83e89a3..2fc862c 100644 --- a/src/config.zig +++ b/src/config.zig @@ -1,4 +1,4 @@ -pub const BUFFER_SIZE = 1024 * 50; // Line limit when parsing file +pub const BUFFER_SIZE = 1024 * 64 * 64; // Line limit when parsing file and other buffers pub const MAX_FILE_SIZE = 5e+6; // 5Mb pub const CSV_DELIMITER = ';'; // TODO: Delete diff --git a/src/fileEngine.zig b/src/fileEngine.zig index 45d20ac..5f0aac1 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -3,6 +3,7 @@ const utils = @import("stuffs/utils.zig"); const dtype = @import("dtype"); const s2t = dtype.s2t; const zid = @import("ZipponData"); +const time = std.time; const Allocator = std.mem.Allocator; const UUID = dtype.UUID; @@ -297,7 +298,7 @@ pub const FileEngine = struct { self: *FileEngine, struct_name: []const u8, filter: ?Filter, - buffer: *std.ArrayList(u8), + writer: anytype, additional_data: *AdditionalData, ) FileEngineError!void { const sstruct = try self.structName2SchemaStruct(struct_name); @@ -317,13 +318,18 @@ pub const FileEngine = struct { additional_data.populateWithEverything(self.allocator, sstruct.members) catch return FileEngineError.MemoryError; } - var writer = buffer.writer(); + var data_buffer: [BUFFER_SIZE]u8 = undefined; + var fa = std.heap.FixedBufferAllocator.init(&data_buffer); + defer fa.reset(); + const data_allocator = fa.allocator(); + writer.writeAll("[") catch return FileEngineError.WriteError; for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that self.allocator.free(path_buff); path_buff = std.fmt.allocPrint(self.allocator, "{d}.zid", .{file_index}) catch return FileEngineError.MemoryError; - var iter = zid.DataIterator.init(self.allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError; + fa.reset(); + var iter = zid.DataIterator.init(data_allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError; defer iter.deinit(); blk: while (iter.next() catch return FileEngineError.ZipponDataError) |row| { @@ -361,7 +367,6 @@ pub const FileEngine = struct { if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break :blk; } } - writer.writeAll("]") catch return FileEngineError.WriteError; } @@ -399,7 +404,7 @@ pub const FileEngine = struct { self: *FileEngine, struct_name: []const u8, map: std.StringHashMap([]const u8), - buffer: *std.ArrayList(u8), + writer: anytype, ) FileEngineError!void { const uuid = UUID.init(); @@ -412,18 +417,20 @@ pub const FileEngine = struct { ) catch return FileEngineError.MemoryError; defer self.allocator.free(path); - var arena = std.heap.ArenaAllocator.init(self.allocator); - defer arena.deinit(); - const data = try self.orderedNewData(arena.allocator(), struct_name, map); + var data_buffer: [BUFFER_SIZE]u8 = undefined; + var fa = std.heap.FixedBufferAllocator.init(&data_buffer); + defer fa.reset(); + const data_allocator = fa.allocator(); + + const data = try self.orderedNewData(data_allocator, struct_name, map); var data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError; data_writer.write(data) catch return FileEngineError.ZipponDataError; data_writer.flush() catch return FileEngineError.ZipponDataError; - var writer = buffer.writer(); - writer.writeByte('{') catch return FileEngineError.WriteError; + writer.writeByte('[') catch return FileEngineError.WriteError; writer.print("\"{s}\"", .{uuid.format_uuid()}) catch return FileEngineError.WriteError; - writer.writeAll("}, ") catch return FileEngineError.WriteError; + writer.writeAll("], ") catch return FileEngineError.WriteError; } pub fn updateEntities( @@ -431,7 +438,7 @@ pub const FileEngine = struct { struct_name: []const u8, filter: ?Filter, map: std.StringHashMap([]const u8), - buffer: *std.ArrayList(u8), + writer: anytype, additional_data: *AdditionalData, ) FileEngineError!void { const sstruct = try self.structName2SchemaStruct(struct_name); @@ -456,7 +463,6 @@ pub const FileEngine = struct { new_data_buff[i] = try string2Data(self.allocator, dt, map.get(member).?); } - var writer = buffer.writer(); writer.writeAll("[") catch return FileEngineError.WriteError; for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break; @@ -483,8 +489,6 @@ pub const FileEngine = struct { new_data_buff[i] = row[i]; } - std.debug.print("{any}\n\n", .{new_data_buff}); - new_writer.write(new_data_buff) catch return FileEngineError.WriteError; writer.writeByte('{') catch return FileEngineError.WriteError; writer.print("\"{s}\"", .{UUID.format_bytes(row[0].UUID)}) catch return FileEngineError.WriteError; @@ -502,8 +506,6 @@ pub const FileEngine = struct { dir.rename(new_path_buff, path_buff) catch return FileEngineError.RenameFileError; } - writer.writeAll("]") catch return FileEngineError.WriteError; - for (try self.structName2structMembers(struct_name), 1..) |member, i| { if (!map.contains(member)) continue; @@ -524,7 +526,7 @@ pub const FileEngine = struct { self: *FileEngine, struct_name: []const u8, filter: ?Filter, - buffer: *std.ArrayList(u8), + writer: anytype, additional_data: *AdditionalData, ) FileEngineError!void { const sstruct = try self.structName2SchemaStruct(struct_name); @@ -539,7 +541,6 @@ pub const FileEngine = struct { defer self.allocator.free(path_buff); const dir = std.fs.cwd().openDir(path_buff, .{}) catch return FileEngineError.CantOpenDir; - var writer = buffer.writer(); writer.writeAll("[") catch return FileEngineError.WriteError; for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that self.allocator.free(path_buff); @@ -642,7 +643,13 @@ pub const FileEngine = struct { } /// Take a map from the parseNewData and return an ordered array of Data to be use in a DataWriter - fn orderedNewData(self: *FileEngine, allocator: Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError![]const zid.Data { + /// TODO: Optimize + fn orderedNewData( + self: *FileEngine, + allocator: Allocator, + struct_name: []const u8, + map: std.StringHashMap([]const u8), + ) FileEngineError![]zid.Data { const members = try self.structName2structMembers(struct_name); const types = try self.structName2DataType(struct_name); @@ -660,6 +667,7 @@ pub const FileEngine = struct { // --------------------Schema utils-------------------- /// Get the index of the first file that is bellow the size limit. If not found, create a new file + /// TODO: Need some serious speed up. I should keep in memory a file->size as a hashmap and use that instead fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!usize { var path = std.fmt.allocPrint( self.allocator, @@ -682,7 +690,7 @@ pub const FileEngine = struct { } } - i += 1; + self.allocator.free(path); path = std.fmt.allocPrint( self.allocator, "{s}/DATA/{s}/{d}.zid", diff --git a/src/stuffs/utils.zig b/src/stuffs/utils.zig index 1272714..c30ebf7 100644 --- a/src/stuffs/utils.zig +++ b/src/stuffs/utils.zig @@ -37,6 +37,8 @@ const stdout = std.io.getStdOut().writer(); // Maybe create a struct for that pub fn send(comptime format: []const u8, args: anytype) void { + if (false) return; + stdout.print(format, args) catch |err| { log.err("Can't send: {any}", .{err}); stdout.print("\x03\n", .{}) catch {}; diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 5975e3c..7ea4292 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -23,6 +23,10 @@ const printError = @import("stuffs/utils.zig").printError; const ZiQlParserError = @import("stuffs/errors.zig").ZiQlParserError; const ZipponError = @import("stuffs/errors.zig").ZipponError; +const BUFFER_SIZE = @import("config.zig").BUFFER_SIZE; + +const log = std.log.scoped(.ziqlParser); + const State = enum { start, invalid, @@ -85,6 +89,11 @@ pub const Parser = struct { var struct_name: []const u8 = undefined; var action: enum { GRAB, ADD, UPDATE, DELETE } = undefined; + var out_buff: [BUFFER_SIZE]u8 = undefined; + var fa = std.heap.FixedBufferAllocator.init(&out_buff); + defer fa.reset(); + const out_allocator = self.allocator; + var token = self.toker.next(); var keep_next = false; // Use in the loop to prevent to get the next token when continue. Just need to make it true and it is reset at every loop @@ -177,18 +186,18 @@ pub const Parser = struct { var filter = try self.parseFilter(struct_name, false); defer filter.deinit(); - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.parseEntities(struct_name, filter, &buff, &additional_data); + try self.file_engine.parseEntities(struct_name, filter, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, .eof => { - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.parseEntities(struct_name, null, &buff, &additional_data); + try self.file_engine.parseEntities(struct_name, null, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, @@ -207,9 +216,6 @@ pub const Parser = struct { var filter = try self.parseFilter(struct_name, false); defer filter.deinit(); - var uuids = std.ArrayList(UUID).init(self.allocator); - defer uuids.deinit(); - token = self.toker.last(); if (token.tag != .keyword_to) return printError( @@ -233,10 +239,10 @@ pub const Parser = struct { defer data_map.deinit(); try self.parseNewData(&data_map, struct_name); - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.updateEntities(struct_name, filter, data_map, &buff, &additional_data); + try self.file_engine.updateEntities(struct_name, filter, data_map, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, @@ -258,10 +264,10 @@ pub const Parser = struct { defer data_map.deinit(); try self.parseNewData(&data_map, struct_name); - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.updateEntities(struct_name, null, data_map, &buff, &additional_data); + try self.file_engine.updateEntities(struct_name, null, data_map, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, @@ -279,18 +285,18 @@ pub const Parser = struct { var filter = try self.parseFilter(struct_name, false); defer filter.deinit(); - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.deleteEntities(struct_name, filter, &buff, &additional_data); + try self.file_engine.deleteEntities(struct_name, filter, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, .eof => { - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - try self.file_engine.deleteEntities(struct_name, null, &buff, &additional_data); + try self.file_engine.deleteEntities(struct_name, null, &buff.writer(), &additional_data); send("{s}", .{buff.items}); state = .end; }, @@ -341,10 +347,16 @@ pub const Parser = struct { token.loc.end, ); } - var buff = std.ArrayList(u8).init(self.allocator); + var buff = std.ArrayList(u8).init(out_allocator); defer buff.deinit(); - self.file_engine.writeEntity(struct_name, data_map, &buff) catch return ZipponError.CantWriteEntity; + token = self.toker.last_token; + log.info("Token end of add: {s} {any}\n", .{ self.toker.getTokenSlice(token), token.tag }); + if (token.tag == .identifier and std.mem.eql(u8, self.toker.getTokenSlice(token), "MULTIPLE")) { + for (0..1_000_000) |_| self.file_engine.writeEntity(struct_name, data_map, &buff.writer()) catch return ZipponError.CantWriteEntity; + } else { + self.file_engine.writeEntity(struct_name, data_map, &buff.writer()) catch return ZipponError.CantWriteEntity; + } send("{s}", .{buff.items}); state = .end; }, @@ -1105,5 +1117,4 @@ fn testParseFilter(source: [:0]const u8) !void { defer filter.deinit(); std.debug.print("{s}\n", .{source}); filter.debugPrint(); - std.debug.print("\n", .{}); }