Started to use fixed lenght alloc for performance

For very importatnt stuff like the writter that write data when parsing,
started to use fixed length because it take the majority of the time to
write, not to parse =/

Gonna need to improve that
This commit is contained in:
Adrien Bouvais 2024-11-02 22:12:47 +01:00
parent aaa1cb2589
commit dba73ce113
4 changed files with 61 additions and 40 deletions

View File

@ -1,4 +1,4 @@
pub const BUFFER_SIZE = 1024 * 50; // Line limit when parsing file pub const BUFFER_SIZE = 1024 * 64 * 64; // Line limit when parsing file and other buffers
pub const MAX_FILE_SIZE = 5e+6; // 5Mb pub const MAX_FILE_SIZE = 5e+6; // 5Mb
pub const CSV_DELIMITER = ';'; // TODO: Delete pub const CSV_DELIMITER = ';'; // TODO: Delete

View File

@ -3,6 +3,7 @@ const utils = @import("stuffs/utils.zig");
const dtype = @import("dtype"); const dtype = @import("dtype");
const s2t = dtype.s2t; const s2t = dtype.s2t;
const zid = @import("ZipponData"); const zid = @import("ZipponData");
const time = std.time;
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const UUID = dtype.UUID; const UUID = dtype.UUID;
@ -297,7 +298,7 @@ pub const FileEngine = struct {
self: *FileEngine, self: *FileEngine,
struct_name: []const u8, struct_name: []const u8,
filter: ?Filter, filter: ?Filter,
buffer: *std.ArrayList(u8), writer: anytype,
additional_data: *AdditionalData, additional_data: *AdditionalData,
) FileEngineError!void { ) FileEngineError!void {
const sstruct = try self.structName2SchemaStruct(struct_name); const sstruct = try self.structName2SchemaStruct(struct_name);
@ -317,13 +318,18 @@ pub const FileEngine = struct {
additional_data.populateWithEverything(self.allocator, sstruct.members) catch return FileEngineError.MemoryError; additional_data.populateWithEverything(self.allocator, sstruct.members) catch return FileEngineError.MemoryError;
} }
var writer = buffer.writer(); var data_buffer: [BUFFER_SIZE]u8 = undefined;
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
defer fa.reset();
const data_allocator = fa.allocator();
writer.writeAll("[") catch return FileEngineError.WriteError; writer.writeAll("[") catch return FileEngineError.WriteError;
for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that
self.allocator.free(path_buff); self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{d}.zid", .{file_index}) catch return FileEngineError.MemoryError; path_buff = std.fmt.allocPrint(self.allocator, "{d}.zid", .{file_index}) catch return FileEngineError.MemoryError;
var iter = zid.DataIterator.init(self.allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError; fa.reset();
var iter = zid.DataIterator.init(data_allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError;
defer iter.deinit(); defer iter.deinit();
blk: while (iter.next() catch return FileEngineError.ZipponDataError) |row| { blk: while (iter.next() catch return FileEngineError.ZipponDataError) |row| {
@ -361,7 +367,6 @@ pub const FileEngine = struct {
if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break :blk; if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break :blk;
} }
} }
writer.writeAll("]") catch return FileEngineError.WriteError; writer.writeAll("]") catch return FileEngineError.WriteError;
} }
@ -399,7 +404,7 @@ pub const FileEngine = struct {
self: *FileEngine, self: *FileEngine,
struct_name: []const u8, struct_name: []const u8,
map: std.StringHashMap([]const u8), map: std.StringHashMap([]const u8),
buffer: *std.ArrayList(u8), writer: anytype,
) FileEngineError!void { ) FileEngineError!void {
const uuid = UUID.init(); const uuid = UUID.init();
@ -412,18 +417,20 @@ pub const FileEngine = struct {
) catch return FileEngineError.MemoryError; ) catch return FileEngineError.MemoryError;
defer self.allocator.free(path); defer self.allocator.free(path);
var arena = std.heap.ArenaAllocator.init(self.allocator); var data_buffer: [BUFFER_SIZE]u8 = undefined;
defer arena.deinit(); var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
const data = try self.orderedNewData(arena.allocator(), struct_name, map); defer fa.reset();
const data_allocator = fa.allocator();
const data = try self.orderedNewData(data_allocator, struct_name, map);
var data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError; var data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError;
data_writer.write(data) catch return FileEngineError.ZipponDataError; data_writer.write(data) catch return FileEngineError.ZipponDataError;
data_writer.flush() catch return FileEngineError.ZipponDataError; data_writer.flush() catch return FileEngineError.ZipponDataError;
var writer = buffer.writer(); writer.writeByte('[') catch return FileEngineError.WriteError;
writer.writeByte('{') catch return FileEngineError.WriteError;
writer.print("\"{s}\"", .{uuid.format_uuid()}) catch return FileEngineError.WriteError; writer.print("\"{s}\"", .{uuid.format_uuid()}) catch return FileEngineError.WriteError;
writer.writeAll("}, ") catch return FileEngineError.WriteError; writer.writeAll("], ") catch return FileEngineError.WriteError;
} }
pub fn updateEntities( pub fn updateEntities(
@ -431,7 +438,7 @@ pub const FileEngine = struct {
struct_name: []const u8, struct_name: []const u8,
filter: ?Filter, filter: ?Filter,
map: std.StringHashMap([]const u8), map: std.StringHashMap([]const u8),
buffer: *std.ArrayList(u8), writer: anytype,
additional_data: *AdditionalData, additional_data: *AdditionalData,
) FileEngineError!void { ) FileEngineError!void {
const sstruct = try self.structName2SchemaStruct(struct_name); const sstruct = try self.structName2SchemaStruct(struct_name);
@ -456,7 +463,6 @@ pub const FileEngine = struct {
new_data_buff[i] = try string2Data(self.allocator, dt, map.get(member).?); new_data_buff[i] = try string2Data(self.allocator, dt, map.get(member).?);
} }
var writer = buffer.writer();
writer.writeAll("[") catch return FileEngineError.WriteError; writer.writeAll("[") catch return FileEngineError.WriteError;
for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that
if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break; if (additional_data.entity_count_to_find != 0 and total_currently_found >= additional_data.entity_count_to_find) break;
@ -483,8 +489,6 @@ pub const FileEngine = struct {
new_data_buff[i] = row[i]; new_data_buff[i] = row[i];
} }
std.debug.print("{any}\n\n", .{new_data_buff});
new_writer.write(new_data_buff) catch return FileEngineError.WriteError; new_writer.write(new_data_buff) catch return FileEngineError.WriteError;
writer.writeByte('{') catch return FileEngineError.WriteError; writer.writeByte('{') catch return FileEngineError.WriteError;
writer.print("\"{s}\"", .{UUID.format_bytes(row[0].UUID)}) catch return FileEngineError.WriteError; writer.print("\"{s}\"", .{UUID.format_bytes(row[0].UUID)}) catch return FileEngineError.WriteError;
@ -502,8 +506,6 @@ pub const FileEngine = struct {
dir.rename(new_path_buff, path_buff) catch return FileEngineError.RenameFileError; dir.rename(new_path_buff, path_buff) catch return FileEngineError.RenameFileError;
} }
writer.writeAll("]") catch return FileEngineError.WriteError;
for (try self.structName2structMembers(struct_name), 1..) |member, i| { for (try self.structName2structMembers(struct_name), 1..) |member, i| {
if (!map.contains(member)) continue; if (!map.contains(member)) continue;
@ -524,7 +526,7 @@ pub const FileEngine = struct {
self: *FileEngine, self: *FileEngine,
struct_name: []const u8, struct_name: []const u8,
filter: ?Filter, filter: ?Filter,
buffer: *std.ArrayList(u8), writer: anytype,
additional_data: *AdditionalData, additional_data: *AdditionalData,
) FileEngineError!void { ) FileEngineError!void {
const sstruct = try self.structName2SchemaStruct(struct_name); const sstruct = try self.structName2SchemaStruct(struct_name);
@ -539,7 +541,6 @@ pub const FileEngine = struct {
defer self.allocator.free(path_buff); defer self.allocator.free(path_buff);
const dir = std.fs.cwd().openDir(path_buff, .{}) catch return FileEngineError.CantOpenDir; const dir = std.fs.cwd().openDir(path_buff, .{}) catch return FileEngineError.CantOpenDir;
var writer = buffer.writer();
writer.writeAll("[") catch return FileEngineError.WriteError; writer.writeAll("[") catch return FileEngineError.WriteError;
for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that for (0..(max_file_index + 1)) |file_index| { // TODO: Multi thread that
self.allocator.free(path_buff); self.allocator.free(path_buff);
@ -642,7 +643,13 @@ pub const FileEngine = struct {
} }
/// Take a map from the parseNewData and return an ordered array of Data to be use in a DataWriter /// Take a map from the parseNewData and return an ordered array of Data to be use in a DataWriter
fn orderedNewData(self: *FileEngine, allocator: Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError![]const zid.Data { /// TODO: Optimize
fn orderedNewData(
self: *FileEngine,
allocator: Allocator,
struct_name: []const u8,
map: std.StringHashMap([]const u8),
) FileEngineError![]zid.Data {
const members = try self.structName2structMembers(struct_name); const members = try self.structName2structMembers(struct_name);
const types = try self.structName2DataType(struct_name); const types = try self.structName2DataType(struct_name);
@ -660,6 +667,7 @@ pub const FileEngine = struct {
// --------------------Schema utils-------------------- // --------------------Schema utils--------------------
/// Get the index of the first file that is bellow the size limit. If not found, create a new file /// Get the index of the first file that is bellow the size limit. If not found, create a new file
/// TODO: Need some serious speed up. I should keep in memory a file->size as a hashmap and use that instead
fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!usize { fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!usize {
var path = std.fmt.allocPrint( var path = std.fmt.allocPrint(
self.allocator, self.allocator,
@ -682,7 +690,7 @@ pub const FileEngine = struct {
} }
} }
i += 1; self.allocator.free(path);
path = std.fmt.allocPrint( path = std.fmt.allocPrint(
self.allocator, self.allocator,
"{s}/DATA/{s}/{d}.zid", "{s}/DATA/{s}/{d}.zid",

View File

@ -37,6 +37,8 @@ const stdout = std.io.getStdOut().writer();
// Maybe create a struct for that // Maybe create a struct for that
pub fn send(comptime format: []const u8, args: anytype) void { pub fn send(comptime format: []const u8, args: anytype) void {
if (false) return;
stdout.print(format, args) catch |err| { stdout.print(format, args) catch |err| {
log.err("Can't send: {any}", .{err}); log.err("Can't send: {any}", .{err});
stdout.print("\x03\n", .{}) catch {}; stdout.print("\x03\n", .{}) catch {};

View File

@ -23,6 +23,10 @@ const printError = @import("stuffs/utils.zig").printError;
const ZiQlParserError = @import("stuffs/errors.zig").ZiQlParserError; const ZiQlParserError = @import("stuffs/errors.zig").ZiQlParserError;
const ZipponError = @import("stuffs/errors.zig").ZipponError; const ZipponError = @import("stuffs/errors.zig").ZipponError;
const BUFFER_SIZE = @import("config.zig").BUFFER_SIZE;
const log = std.log.scoped(.ziqlParser);
const State = enum { const State = enum {
start, start,
invalid, invalid,
@ -85,6 +89,11 @@ pub const Parser = struct {
var struct_name: []const u8 = undefined; var struct_name: []const u8 = undefined;
var action: enum { GRAB, ADD, UPDATE, DELETE } = undefined; var action: enum { GRAB, ADD, UPDATE, DELETE } = undefined;
var out_buff: [BUFFER_SIZE]u8 = undefined;
var fa = std.heap.FixedBufferAllocator.init(&out_buff);
defer fa.reset();
const out_allocator = self.allocator;
var token = self.toker.next(); var token = self.toker.next();
var keep_next = false; // Use in the loop to prevent to get the next token when continue. Just need to make it true and it is reset at every loop var keep_next = false; // Use in the loop to prevent to get the next token when continue. Just need to make it true and it is reset at every loop
@ -177,18 +186,18 @@ pub const Parser = struct {
var filter = try self.parseFilter(struct_name, false); var filter = try self.parseFilter(struct_name, false);
defer filter.deinit(); defer filter.deinit();
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.parseEntities(struct_name, filter, &buff, &additional_data); try self.file_engine.parseEntities(struct_name, filter, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
.eof => { .eof => {
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.parseEntities(struct_name, null, &buff, &additional_data); try self.file_engine.parseEntities(struct_name, null, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
@ -207,9 +216,6 @@ pub const Parser = struct {
var filter = try self.parseFilter(struct_name, false); var filter = try self.parseFilter(struct_name, false);
defer filter.deinit(); defer filter.deinit();
var uuids = std.ArrayList(UUID).init(self.allocator);
defer uuids.deinit();
token = self.toker.last(); token = self.toker.last();
if (token.tag != .keyword_to) return printError( if (token.tag != .keyword_to) return printError(
@ -233,10 +239,10 @@ pub const Parser = struct {
defer data_map.deinit(); defer data_map.deinit();
try self.parseNewData(&data_map, struct_name); try self.parseNewData(&data_map, struct_name);
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.updateEntities(struct_name, filter, data_map, &buff, &additional_data); try self.file_engine.updateEntities(struct_name, filter, data_map, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
@ -258,10 +264,10 @@ pub const Parser = struct {
defer data_map.deinit(); defer data_map.deinit();
try self.parseNewData(&data_map, struct_name); try self.parseNewData(&data_map, struct_name);
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.updateEntities(struct_name, null, data_map, &buff, &additional_data); try self.file_engine.updateEntities(struct_name, null, data_map, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
@ -279,18 +285,18 @@ pub const Parser = struct {
var filter = try self.parseFilter(struct_name, false); var filter = try self.parseFilter(struct_name, false);
defer filter.deinit(); defer filter.deinit();
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.deleteEntities(struct_name, filter, &buff, &additional_data); try self.file_engine.deleteEntities(struct_name, filter, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
.eof => { .eof => {
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
try self.file_engine.deleteEntities(struct_name, null, &buff, &additional_data); try self.file_engine.deleteEntities(struct_name, null, &buff.writer(), &additional_data);
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
@ -341,10 +347,16 @@ pub const Parser = struct {
token.loc.end, token.loc.end,
); );
} }
var buff = std.ArrayList(u8).init(self.allocator); var buff = std.ArrayList(u8).init(out_allocator);
defer buff.deinit(); defer buff.deinit();
self.file_engine.writeEntity(struct_name, data_map, &buff) catch return ZipponError.CantWriteEntity; token = self.toker.last_token;
log.info("Token end of add: {s} {any}\n", .{ self.toker.getTokenSlice(token), token.tag });
if (token.tag == .identifier and std.mem.eql(u8, self.toker.getTokenSlice(token), "MULTIPLE")) {
for (0..1_000_000) |_| self.file_engine.writeEntity(struct_name, data_map, &buff.writer()) catch return ZipponError.CantWriteEntity;
} else {
self.file_engine.writeEntity(struct_name, data_map, &buff.writer()) catch return ZipponError.CantWriteEntity;
}
send("{s}", .{buff.items}); send("{s}", .{buff.items});
state = .end; state = .end;
}, },
@ -1105,5 +1117,4 @@ fn testParseFilter(source: [:0]const u8) !void {
defer filter.deinit(); defer filter.deinit();
std.debug.print("{s}\n", .{source}); std.debug.print("{s}\n", .{source});
filter.debugPrint(); filter.debugPrint();
std.debug.print("\n", .{});
} }