ZipponDB/src/fileEngine.zig
MrBounty ac4186529d Implement send all and send JSON
Now send the JSON using the additional data.

Also implemented the empty filter and the no filter like GRAB User {}
and GRAB User
2024-10-12 19:02:23 +02:00

1067 lines
50 KiB
Zig

const std = @import("std");
const Allocator = std.mem.Allocator;
const UUID = @import("types/uuid.zig").UUID;
const DataType = @import("types/dataType.zig").DataType;
const FileTokenizer = @import("tokenizers/file.zig").Tokenizer;
const FileToken = @import("tokenizers/file.zig").Token;
const SchemaStruct = @import("schemaParser.zig").Parser.SchemaStruct;
const SchemaParser = @import("schemaParser.zig").Parser;
const SchemaTokenizer = @import("tokenizers/schema.zig").Tokenizer;
const SchemaToken = @import("tokenizers/schema.zig").Token;
const AdditionalData = @import("ziqlParser.zig").Parser.AdditionalData;
//TODO: Create a union class and chose between file and memory
/// Manage everything that is relate to read or write in files
/// Or even get stats, whatever. If it touch files, it's here
pub const FileEngine = struct {
allocator: Allocator,
path_to_ZipponDB_dir: []const u8, // The path to the DATA folder
max_file_size: usize = 5e+4, // 50kb TODO: Change
null_terminated_schema_buff: [:0]u8,
struct_array: std.ArrayList(SchemaStruct),
pub fn init(allocator: Allocator, path: ?[]const u8) FileEngine {
const path_to_ZipponDB_dir = path orelse "ZipponDB";
var schema_buf = allocator.alloc(u8, 1024 * 50) catch @panic("Cant allocate the schema buffer");
defer allocator.free(schema_buf);
const len: usize = FileEngine.readSchemaFile(allocator, path_to_ZipponDB_dir, schema_buf) catch 0;
const null_terminated_schema_buff = allocator.dupeZ(u8, schema_buf[0..len]) catch @panic("Cant allocate null term buffer for the schema");
var toker = SchemaTokenizer.init(null_terminated_schema_buff);
var parser = SchemaParser.init(&toker, allocator);
var struct_array = std.ArrayList(SchemaStruct).init(allocator);
parser.parse(&struct_array) catch {};
return FileEngine{
.allocator = allocator,
.path_to_ZipponDB_dir = path_to_ZipponDB_dir,
.null_terminated_schema_buff = null_terminated_schema_buff,
.struct_array = struct_array,
};
}
pub fn deinit(self: *FileEngine) void {
for (self.struct_array.items) |*elem| elem.deinit();
self.struct_array.deinit();
self.allocator.free(self.null_terminated_schema_buff);
}
const ComparisonValue = union {
int: i64,
float: f64,
str: []const u8,
bool_: bool,
id: UUID,
int_array: std.ArrayList(i64),
str_array: std.ArrayList([]const u8),
float_array: std.ArrayList(f64),
bool_array: std.ArrayList(bool),
id_array: std.ArrayList(UUID),
};
/// use to parse file. It take a struct name and member name to know what to parse.
/// An Operation from equal, different, superior, superior_or_equal, ...
/// The DataType from int, float and str
pub const Condition = struct {
struct_name: []const u8,
member_name: []const u8 = undefined,
value: []const u8 = undefined,
operation: enum { equal, different, superior, superior_or_equal, inferior, inferior_or_equal, in } = undefined, // Add more stuff like IN
data_type: DataType = undefined,
pub fn init(struct_name: []const u8) Condition {
return Condition{ .struct_name = struct_name };
}
};
// TODO: A function that take a list of UUID and write into the buffer the message tot send
// Like the other, write it line by line then if the UUID is found, you write the data
// The output need to be in the JSON format, so change '' into ""
// Maybe I will change '' to "" everywhere
pub fn parseAndWriteToSend(self: *FileEngine, struct_name: []const u8, uuids: []UUID, buffer: *std.ArrayList(u8), additional_data: AdditionalData) !void {
const max_file_index = try self.maxFileIndex(struct_name);
var current_index: usize = 0;
var path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff);
var file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't open first file to init a data iterator");
};
defer file.close();
var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(file.reader());
var reader = buffered.reader();
var founded = false;
var token: FileToken = undefined;
var out_writer = buffer.writer();
try out_writer.writeAll("[");
// Write the start {
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
if (current_index == max_file_index) break;
current_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
file.close(); // Do I need to close ? I think so
file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{path_buff});
@panic("Can't open file to update a data iterator");
};
buffered = std.io.bufferedReader(file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => {
std.debug.print("Error while reading file: {any}\n", .{err});
break;
},
};
const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]);
defer self.allocator.free(null_terminated_string);
var data_toker = FileTokenizer.init(null_terminated_string);
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
founded = false;
// Optimize this
for (uuids) |elem| {
if (elem.compare(uuid)) {
founded = true;
break;
}
}
if (founded) {
try out_writer.writeAll("{");
for (self.structName2structMembers(struct_name), self.structName2DataType(struct_name)) |member_name, member_type| {
token = data_toker.next();
// FIXME: When relationship will be implemented, need to check if the len of NON link is 0
if ((additional_data.member_to_find.items.len == 0) or (self.isMemberNameInAdditionalData(self.locToSlice(member_name), additional_data))) {
// write the member name and = sign
try out_writer.print("{s}: ", .{self.locToSlice(member_name)});
switch (member_type) {
.str => {
const str_slice = data_toker.getTokenSlice(token);
try out_writer.print("\"{s}\"", .{str_slice[1 .. str_slice.len - 1]});
},
.str_array => {}, // TODO: Write [ then "" then text, repeate
.int_array, .float_array, .bool_array, .id_array => {
while (token.tag != .r_bracket) : (token = data_toker.next()) {
try out_writer.writeAll(data_toker.getTokenSlice(token));
try out_writer.writeAll(" ");
}
try out_writer.writeAll(data_toker.getTokenSlice(token));
},
else => try out_writer.writeAll(data_toker.getTokenSlice(token)), //write the value as if
}
try out_writer.writeAll(", ");
}
}
try out_writer.writeAll("}");
try out_writer.writeAll(", ");
}
}
// Write the end }
try out_writer.writeAll("]");
}
fn isMemberNameInAdditionalData(_: *FileEngine, member_name: []const u8, additional_data: AdditionalData) bool {
for (additional_data.member_to_find.items) |elem| {
if (std.mem.eql(u8, member_name, elem.name)) return true;
}
return false;
}
/// Use a struct name to populate a list with all UUID of this struct
pub fn getAllUUIDList(self: *FileEngine, struct_name: []const u8, uuid_array: *std.ArrayList(UUID)) !void {
const max_file_index = try self.maxFileIndex(struct_name);
var current_index: usize = 0;
var path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff);
var file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't open first file to init a data iterator");
};
defer file.close();
var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(file.reader());
var reader = buffered.reader();
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
if (current_index == max_file_index) break;
current_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
file.close(); // Do I need to close ? I think so
file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{path_buff});
@panic("Can't open file to update a data iterator");
};
buffered = std.io.bufferedReader(file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => {
std.debug.print("Error while reading file: {any}\n", .{err});
break;
},
};
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
try uuid_array.append(uuid);
}
}
/// Take a condition and an array of UUID and fill the array with all UUID that match the condition
/// TODO: Change the UUID function to be a B+Tree
/// TODO: Optimize the shit out of this, it it way too slow rn. Here some ideas
/// - Array can take a very long time to parse, maybe put them in a seperate file. But string can be too...
/// - Use the stream directly in the tokenizer
/// - Use a fixed size and split into other file. Like one file for one member (Because very long, like an array of 1000 value) and another one for everything else
/// The threselhold can be like if the average len is > 400 character. So UUID would take less that 10% of the storage
/// - Save data in a more compact way
/// - Multithreading, each thread take a list of files and we mix them at the end
pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) !void {
const max_file_index = try self.maxFileIndex(condition.struct_name);
var current_index: usize = 0;
var path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff);
var file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't open first file to init a data iterator");
};
defer file.close();
var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(file.reader());
var reader = buffered.reader();
var compare_value: ComparisonValue = undefined;
switch (condition.data_type) {
.int => compare_value = ComparisonValue{ .int = parseInt(condition.value) },
.str => compare_value = ComparisonValue{ .str = condition.value },
.float => compare_value = ComparisonValue{ .float = parseFloat(condition.value) },
.bool => compare_value = ComparisonValue{ .bool_ = parseBool(condition.value) },
.id => compare_value = ComparisonValue{ .id = try UUID.parse(condition.value) },
.int_array => compare_value = ComparisonValue{ .int_array = parseArrayInt(self.allocator, condition.value) },
.str_array => compare_value = ComparisonValue{ .str_array = parseArrayStr(self.allocator, condition.value) },
.float_array => compare_value = ComparisonValue{ .float_array = parseArrayFloat(self.allocator, condition.value) },
.bool_array => compare_value = ComparisonValue{ .bool_array = parseArrayBool(self.allocator, condition.value) },
.id_array => compare_value = ComparisonValue{ .id_array = parseArrayUUID(self.allocator, condition.value) },
}
defer {
switch (condition.data_type) {
.int_array => compare_value.int_array.deinit(),
.str_array => compare_value.str_array.deinit(),
.float_array => compare_value.float_array.deinit(),
.bool_array => compare_value.bool_array.deinit(),
.id_array => compare_value.id_array.deinit(),
else => {},
}
}
var token: FileToken = undefined;
const column_index = self.columnIndexOfMember(condition.struct_name, condition.member_name);
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
if (current_index == max_file_index) break;
current_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
file.close(); // Do I need to close ? I think so
file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{path_buff});
@panic("Can't open file to update a data iterator");
};
buffered = std.io.bufferedReader(file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => {
std.debug.print("Error while reading file: {any}\n", .{err});
break;
},
};
// Maybe use the stream directly to prevent duplicate the data
// But I would need to change the Tokenizer a lot...
const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]);
defer self.allocator.free(null_terminated_string);
var data_toker = FileTokenizer.init(null_terminated_string);
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
// Skip unwanted token
for (0..column_index.?) |_| {
_ = data_toker.next();
}
token = data_toker.next();
// TODO: Make sure in amount that the rest is unreachable by sending an error for wrong condition like superior between 2 string or array
switch (condition.operation) {
.equal => switch (condition.data_type) {
.int => if (compare_value.int == parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float == parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.str => if (std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.bool => if (compare_value.bool_ == parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.id => if (compare_value.id.compare(uuid)) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
.different => switch (condition.data_type) {
.int => if (compare_value.int != parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float != parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.str => if (!std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.bool => if (compare_value.bool_ != parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
.superior_or_equal => switch (condition.data_type) {
.int => if (compare_value.int <= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float <= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
.superior => switch (condition.data_type) {
.int => if (compare_value.int < parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float < parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
.inferior_or_equal => switch (condition.data_type) {
.int => if (compare_value.int >= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float >= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
.inferior => switch (condition.data_type) {
.int => if (compare_value.int > parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float > parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => unreachable,
},
// TODO: Do it for other array
.in => switch (condition.data_type) {
.id_array => {
for (compare_value.id_array.items) |elem| {
if (elem.compare(uuid)) try uuid_array.append(uuid);
}
},
else => unreachable,
},
}
}
}
// TODO: Clean a bit the code
// Do I need multiple files too ? I mean it duplicate UUID a lot, if it's just to save a name like 'Bob', storing a long UUID is overkill
// I could just use a tabular data format with separator using space - Or maybe I encode the uuid to take a minimum space as I always know it size
pub fn writeEntity(self: *FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID {
const uuid = UUID.init();
const potential_file_index = try self.getFirstUsableIndexFile(struct_name);
var file: std.fs.File = undefined;
defer file.close();
var path: []const u8 = undefined;
defer self.allocator.free(path);
if (potential_file_index) |file_index| {
path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, file_index });
file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch @panic("=(");
} else {
const max_index = try self.maxFileIndex(struct_name);
path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, max_index + 1 });
file = std.fs.cwd().createFile(path, .{}) catch @panic("Error creating new data file");
}
try file.seekFromEnd(0);
try file.writer().print("{s}", .{uuid.format_uuid()});
for (self.structName2structMembers(struct_name)) |member_name| {
try file.writer().print(" {s}", .{data_map.get(self.locToSlice(member_name)).?});
}
try file.writer().print("\n", .{});
return uuid;
}
/// Function to update the file with updated data. Take a list of uuid and a list of string map. The map is in the format key: member; value: new value.
/// It create a new index.zippondata.new file in the same folder, stream the output of the old file to it until a uuid is found, then write the new row and continue until the end
/// TODO: Optmize a lot, I did that quickly to work but it is far from optimized. Idea:
/// - Once all uuid found, stream until the end of the file without delimiter or uuid compare
/// - Change map to array
pub fn updateEntities(self: *FileEngine, struct_name: []const u8, uuids: []UUID, new_data_map: std.StringHashMap([]const u8)) !void {
const max_file_index = self.maxFileIndex(struct_name) catch @panic("Cant get max index file when updating");
var current_file_index: usize = 0;
var path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff);
var path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff2);
var old_file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't open first file to init a data iterator");
};
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
var new_file = std.fs.cwd().createFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't create new file to init a data iterator");
};
defer new_file.close();
var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(old_file.reader());
var reader = buffered.reader();
var founded = false;
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, ' ', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
// Start by deleting and renaming the new file
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
self.allocator.free(path_buff2);
path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
old_file.close();
try std.fs.cwd().deleteFile(path_buff);
try std.fs.cwd().rename(path_buff2, path_buff);
if (current_file_index == max_file_index) break;
current_file_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
self.allocator.free(path_buff2);
path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
old_file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{path_buff});
@panic("Can't open file to update entities");
};
new_file = std.fs.cwd().createFile(path_buff2, .{}) catch {
std.debug.print("Error trying to create {s}\n", .{path_buff2});
@panic("Can't create file to update entities");
};
buffered = std.io.bufferedReader(old_file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => {
std.debug.print("Error while reading file: {any}\n", .{err});
break;
},
};
try new_file.writeAll(output_fbs.getWritten());
// THis is the uuid of the current row
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
founded = false;
// Optimize this
for (uuids) |elem| {
if (elem.compare(uuid)) {
founded = true;
break;
}
}
if (founded) {
for (self.structName2structMembers(struct_name), self.structName2DataType(struct_name)) |member_name, member_type| {
// For all collum in the right order, check if the key is in the map, if so use it to write the new value, otherwise use the old file
output_fbs.reset();
switch (member_type) {
.str => {
try reader.streamUntilDelimiter(writer, '\'', null);
try reader.streamUntilDelimiter(writer, '\'', null);
},
.int_array, .float_array, .bool_array, .id_array => try reader.streamUntilDelimiter(writer, ']', null),
.str_array => try reader.streamUntilDelimiter(writer, ']', null), // FIXME: If the string itself contain ], this will be a problem
else => {
try reader.streamUntilDelimiter(writer, ' ', null);
try reader.streamUntilDelimiter(writer, ' ', null);
},
}
if (new_data_map.contains(self.locToSlice(member_name))) {
// Write the new data
try new_file.writer().print(" {s}", .{new_data_map.get(self.locToSlice(member_name)).?});
} else {
// Write the old data
switch (member_type) {
.str => try new_file.writeAll(" \'"),
.int_array => try new_file.writeAll(" "),
.float_array => try new_file.writeAll(" "),
.str_array => try new_file.writeAll(" "),
.bool_array => try new_file.writeAll(" "),
.id_array => try new_file.writeAll(" "),
else => try new_file.writeAll(" "),
}
try new_file.writeAll(output_fbs.getWritten());
switch (member_type) {
.str => try new_file.writeAll("\'"),
.int_array, .float_array, .bool_array, .id_array => try new_file.writeAll("]"),
else => {},
}
}
}
try reader.streamUntilDelimiter(writer, '\n', null);
try new_file.writeAll("\n");
} else {
// stream until the delimiter
output_fbs.reset();
try new_file.writeAll(" ");
try reader.streamUntilDelimiter(writer, '\n', null);
try new_file.writeAll(output_fbs.getWritten());
try new_file.writeAll("\n");
}
}
}
/// Take a kist of UUID and a struct name and delete the row with same UUID
/// TODO: Use B+Tree
pub fn deleteEntities(self: *FileEngine, struct_name: []const u8, uuids: []UUID) !usize {
const max_file_index = self.maxFileIndex(struct_name) catch @panic("Cant get max index file when updating");
var current_file_index: usize = 0;
var path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff);
var path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(path_buff2);
var old_file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't open first file to init a data iterator");
};
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
var new_file = std.fs.cwd().createFile(path_buff, .{}) catch {
std.debug.print("Path: {s}", .{path_buff});
@panic("Can't create new file to init a data iterator");
};
defer new_file.close();
var output: [1024 * 50]u8 = undefined; // Maybe need to increase that as it limit the size of a line in a file
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(old_file.reader());
var reader = buffered.reader();
var founded = false;
var deleted_count: usize = 0;
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, ' ', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
// Start by deleting and renaming the new file
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
self.allocator.free(path_buff2);
path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
old_file.close();
try std.fs.cwd().deleteFile(path_buff);
try std.fs.cwd().rename(path_buff2, path_buff);
if (current_file_index == max_file_index) break;
current_file_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
self.allocator.free(path_buff2);
path_buff2 = std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}/{d}.zippondata.new", .{ self.path_to_ZipponDB_dir, struct_name, current_file_index }) catch @panic("Can't create sub_path for init a DataIterator");
old_file = std.fs.cwd().openFile(path_buff, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{path_buff});
@panic("Can't open file to update entities");
};
new_file = std.fs.cwd().createFile(path_buff2, .{}) catch {
std.debug.print("Error trying to create {s}\n", .{path_buff2});
@panic("Can't create file to update entities");
};
buffered = std.io.bufferedReader(old_file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => {
std.debug.print("Error while reading file: {any}\n", .{err});
break;
},
};
// THis is the uuid of the current row
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
founded = false;
// Optimize this
for (uuids) |elem| {
if (elem.compare(uuid)) {
founded = true;
deleted_count += 1;
break;
}
}
if (!founded) {
// stream until the delimiter
try new_file.writeAll(output_fbs.getWritten());
output_fbs.reset();
try new_file.writeAll(" ");
try reader.streamUntilDelimiter(writer, '\n', null);
try new_file.writeAll(output_fbs.getWritten());
try new_file.writeAll("\n");
} else {
try reader.streamUntilDelimiter(writer, '\n', null);
}
}
return deleted_count;
}
/// Use a filename in the format 1.zippondata and return the 1
/// Note that if I change the extension of the data file, I need to update that as it use a fixed len for the extension
fn fileName2Index(_: FileEngine, file_name: []const u8) usize {
return std.fmt.parseInt(usize, file_name[0..(file_name.len - 11)], 10) catch @panic("Couln't parse the int of a zippondata file.");
}
/// Use the map of file stat to find the first file with under the bytes limit.
/// return the name of the file. If none is found, return null.
fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) !?usize {
const path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, struct_name });
defer self.allocator.free(path);
var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
defer member_dir.close();
var iter = member_dir.iterate();
while (try iter.next()) |entry| {
const file_stat = try member_dir.statFile(entry.name);
if (file_stat.size < self.max_file_size) return self.fileName2Index(entry.name);
}
return null;
}
/// Iter over all file and get the max name and return the value of it as usize
/// So for example if there is 1.zippondata and 2.zippondata it return 2.
fn maxFileIndex(self: FileEngine, struct_name: []const u8) !usize {
const path = try std.fmt.allocPrint(self.allocator, "{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, struct_name });
defer self.allocator.free(path);
const member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
var count: usize = 0;
var iter = member_dir.iterate();
while (try iter.next()) |entry| {
if (entry.kind != std.fs.Dir.Entry.Kind.file) continue;
count += 1;
}
return count - 1;
}
const FileError = error{
SchemaFileNotFound,
SchemaNotConform,
DATAFolderNotFound,
StructFolderNotFound,
CantMakeDir,
CantMakeFile,
};
/// Request a path to a schema file and then create the struct folder
/// TODO: Delete current folder before new one are created
pub fn initDataFolder(self: *FileEngine, path_to_schema_file: []const u8) FileError!void {
var schema_buf = self.allocator.alloc(u8, 1024 * 50) catch @panic("Cant allocate the schema buffer");
defer self.allocator.free(schema_buf);
const file = std.fs.cwd().openFile(path_to_schema_file, .{}) catch return FileError.SchemaFileNotFound;
defer file.close();
const len = file.readAll(schema_buf) catch @panic("Can't read schema file");
self.allocator.free(self.null_terminated_schema_buff);
self.null_terminated_schema_buff = self.allocator.dupeZ(u8, schema_buf[0..len]) catch @panic("Cant allocate null term buffer for the schema");
var toker = SchemaTokenizer.init(self.null_terminated_schema_buff);
var parser = SchemaParser.init(&toker, self.allocator);
// Deinit the struct array before creating a new one
for (self.struct_array.items) |*elem| elem.deinit();
for (0..self.struct_array.items.len) |_| _ = self.struct_array.pop();
parser.parse(&self.struct_array) catch return error.SchemaNotConform;
const path = std.fmt.allocPrint(self.allocator, "{s}/DATA", .{self.path_to_ZipponDB_dir}) catch @panic("Cant allocate path");
defer self.allocator.free(path);
var data_dir = std.fs.cwd().openDir(path, .{}) catch return FileError.DATAFolderNotFound;
defer data_dir.close();
for (self.struct_array.items) |struct_item| {
data_dir.makeDir(self.locToSlice(struct_item.name)) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return FileError.CantMakeDir,
};
const struct_dir = data_dir.openDir(self.locToSlice(struct_item.name), .{}) catch return FileError.StructFolderNotFound;
_ = struct_dir.createFile("0.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return FileError.CantMakeFile,
};
}
self.writeSchemaFile();
}
// Stuff for schema
pub fn readSchemaFile(allocator: Allocator, sub_path: []const u8, buffer: []u8) !usize {
const path = try std.fmt.allocPrint(allocator, "{s}/schema.zipponschema", .{sub_path});
defer allocator.free(path);
const file = try std.fs.cwd().openFile(path, .{});
defer file.close();
const len = try file.readAll(buffer);
return len;
}
pub fn writeSchemaFile(self: *FileEngine) void {
// Delete the current schema file
// Create a new one
// Dumpe the buffer inside
var zippon_dir = std.fs.cwd().openDir(self.path_to_ZipponDB_dir, .{}) catch @panic("Cant open main folder!");
defer zippon_dir.close();
zippon_dir.deleteFile("schema.zipponschema") catch |err| switch (err) {
error.FileNotFound => {},
else => @panic("Error other than file not found when writing the schema."),
};
var file = zippon_dir.createFile("schema.zipponschema", .{}) catch @panic("Can't create new schema file");
defer file.close();
file.writeAll(self.null_terminated_schema_buff) catch @panic("Can't write new schema");
}
pub fn locToSlice(self: *FileEngine, loc: SchemaToken.Loc) []const u8 {
return self.null_terminated_schema_buff[loc.start..loc.end];
}
pub fn columnIndexOfMember(self: *FileEngine, struct_name: []const u8, member_name: []const u8) ?usize {
var i: u16 = 0;
for (self.structName2structMembers(struct_name)) |mn| {
if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return i;
i += 1;
}
return null;
}
/// Get the type of the member
pub fn memberName2DataType(self: *FileEngine, struct_name: []const u8, member_name: []const u8) ?DataType {
var i: u16 = 0;
for (self.structName2structMembers(struct_name)) |mn| {
if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return self.structName2DataType(struct_name)[i];
i += 1;
}
return null;
}
/// Get the list of all member name for a struct name
pub fn structName2structMembers(self: *FileEngine, struct_name: []const u8) []SchemaToken.Loc {
var i: u16 = 0;
while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) break;
if (i == self.struct_array.items.len) {
@panic("Struct name not found!");
}
return self.struct_array.items[i].members.items;
}
pub fn structName2DataType(self: *FileEngine, struct_name: []const u8) []const DataType {
var i: u16 = 0;
while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) break;
return self.struct_array.items[i].types.items;
}
/// Chech if the name of a struct is in the current schema
pub fn isStructNameExists(self: *FileEngine, struct_name: []const u8) bool {
var i: u16 = 0;
while (i < self.struct_array.items.len) : (i += 1) if (std.mem.eql(u8, self.locToSlice(self.struct_array.items[i].name), struct_name)) return true;
return false;
}
/// Check if a struct have the member name
pub fn isMemberNameInStruct(self: *FileEngine, struct_name: []const u8, member_name: []const u8) bool {
for (self.structName2structMembers(struct_name)) |mn| {
if (std.mem.eql(u8, self.locToSlice(mn), member_name)) return true;
}
return false;
}
/// Check if a string is a name of a struct in the currently use engine
pub fn isStructInSchema(self: *FileEngine, struct_name_to_check: []const u8) bool {
for (self.struct_array.items) |struct_schema| {
if (std.mem.eql(u8, struct_name_to_check, struct_schema.name)) {
return true;
}
}
return false;
}
// Return true if the map have all the member name as key and not more
pub fn checkIfAllMemberInMap(self: *FileEngine, struct_name: []const u8, map: *std.StringHashMap([]const u8)) bool {
const all_struct_member = self.structName2structMembers(struct_name);
var count: u16 = 0;
for (all_struct_member) |mn| {
if (map.contains(self.locToSlice(mn))) count += 1 else std.debug.print("Missing: {s}\n", .{self.locToSlice(mn)});
}
return ((count == all_struct_member.len) and (count == map.count()));
}
};
test "Get list of UUID using condition" {
const allocator = std.testing.allocator;
var file_engine = FileEngine.init(allocator, null);
defer file_engine.deinit();
var uuid_array = std.ArrayList(UUID).init(allocator);
defer uuid_array.deinit();
const condition = FileEngine.Condition{ .struct_name = "User", .member_name = "email", .value = "adrien@mail.com", .operation = .equal, .data_type = .str };
try file_engine.getUUIDListUsingCondition(condition, &uuid_array);
}
// Series of functions to use just before creating an entity.
// Will transform the string of data into data of the right type./
pub fn parseInt(value_str: []const u8) i64 {
return std.fmt.parseInt(i64, value_str, 10) catch return 0;
}
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) {
var array = std.ArrayList(i64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseInt(x)) catch {};
}
return array;
}
pub fn parseFloat(value_str: []const u8) f64 {
return std.fmt.parseFloat(f64, value_str) catch return 0;
}
pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) {
var array = std.ArrayList(f64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseFloat(x)) catch {};
}
return array;
}
pub fn parseBool(value_str: []const u8) bool {
return (value_str[0] != '0');
}
pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) {
var array = std.ArrayList(bool).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseBool(x)) catch {};
}
return array;
}
pub fn parseArrayUUID(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(UUID) {
var array = std.ArrayList(UUID).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
const uuid = UUID.parse(x) catch continue;
array.append(uuid) catch continue;
}
return array;
}
// FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't
pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) {
var array = std.ArrayList([]const u8).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
const x_copy = allocator.dupe(u8, x) catch @panic("=(");
array.append(x_copy) catch {};
}
return array;
}
test "Data parsing" {
const allocator = std.testing.allocator;
// Int
const in1: [3][]const u8 = .{ "1", "42", "Hello" };
const expected_out1: [3]i64 = .{ 1, 42, 0 };
for (in1, 0..) |value, i| {
try std.testing.expect(parseInt(value) == expected_out1[i]);
}
// Int array
const in2 = "[1 14 44 42 hello]";
const out2 = parseArrayInt(allocator, in2);
defer out2.deinit();
const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2));
// Float
const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" };
const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 };
for (in3, 0..) |value, i| {
try std.testing.expect(parseFloat(value) == expected_out3[i]);
}
// Float array
const in4 = "[1.5 14.3 44.9999 42 hello]";
const out4 = parseArrayFloat(allocator, in4);
defer out4.deinit();
const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 };
try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4));
// Bool
const in5: [3][]const u8 = .{ "1", "Hello", "0" };
const expected_out5: [3]bool = .{ true, true, false };
for (in5, 0..) |value, i| {
try std.testing.expect(parseBool(value) == expected_out5[i]);
}
// Bool array
const in6 = "[1 0 0 1 1]";
const out6 = parseArrayBool(allocator, in6);
defer out6.deinit();
const expected_out6: [5]bool = .{ true, false, false, true, true };
try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6));
// TODO: Test the string array
}