writeEntity working with new ZipponData package

This commit is contained in:
Adrien Bouvais 2024-10-30 23:50:37 +01:00
parent 294d4f7a2c
commit dbf5a255a9
8 changed files with 397 additions and 346 deletions

View File

@ -74,10 +74,22 @@ pub fn build(b: *std.Build) void {
tests5.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData"));
const run_tests5 = b.addRunArtifact(tests5);
const tests6 = b.addTest(.{
.root_source_file = b.path("src/stuffs/filter.zig"),
.target = target,
.optimize = optimize,
.name = "Filter tree",
.test_runner = b.path("test_runner.zig"),
});
tests6.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") }));
tests6.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData"));
const run_tests6 = b.addRunArtifact(tests6);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_tests1.step);
test_step.dependOn(&run_tests2.step);
test_step.dependOn(&run_tests3.step);
test_step.dependOn(&run_tests4.step);
test_step.dependOn(&run_tests5.step);
test_step.dependOn(&run_tests6.step);
}

View File

@ -3,8 +3,8 @@
.version = "0.1.4",
.dependencies = .{
.ZipponData = .{
.url = "git+https://github.com/MrBounty/ZipponData",
.hash = "12200f2c24233d72c195c1daf04d07f3af22418593f7bbe8a75e54fe6817866f4564",
.url = "git+https://github.com/MrBounty/ZipponData#2ec9cc00e0d798e741d63f91cde18af0f9bf1bce",
.hash = "12206c4cac549a5d1beab62fe1c45388cec0bcc5aac96da8175eccd8abbeb6d41913",
},
},
.paths = .{

View File

@ -2,13 +2,15 @@ const std = @import("std");
const UUID = @import("uuid.zig").UUID;
const DateTime = @import("date.zig").DateTime;
// FIXME: Stop returning arrayList and use toOwnedSlice instead
// TODO: Put those functions somewhere else
pub fn parseInt(value_str: []const u8) i64 {
return std.fmt.parseInt(i64, value_str, 10) catch return 0;
pub fn parseInt(value_str: []const u8) i32 {
return std.fmt.parseInt(i32, value_str, 10) catch return 0;
}
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) {
var array = std.ArrayList(i64).init(allocator);
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i32) {
var array = std.ArrayList(i32).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
@ -57,6 +59,17 @@ pub fn parseArrayDate(allocator: std.mem.Allocator, array_str: []const u8) std.A
return array;
}
pub fn parseArrayDateUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) {
var array = std.ArrayList(u64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseDate(x).toUnix()) catch {};
}
return array;
}
pub fn parseTime(value_str: []const u8) DateTime {
const hours: u16 = std.fmt.parseInt(u16, value_str[0..2], 10) catch 0;
const minutes: u16 = std.fmt.parseInt(u16, value_str[3..5], 10) catch 0;
@ -77,6 +90,17 @@ pub fn parseArrayTime(allocator: std.mem.Allocator, array_str: []const u8) std.A
return array;
}
pub fn parseArrayTimeUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) {
var array = std.ArrayList(u64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseTime(x).toUnix()) catch {};
}
return array;
}
pub fn parseDatetime(value_str: []const u8) DateTime {
const year: u16 = std.fmt.parseInt(u16, value_str[0..4], 10) catch 0;
const month: u16 = std.fmt.parseInt(u16, value_str[5..7], 10) catch 0;
@ -100,6 +124,17 @@ pub fn parseArrayDatetime(allocator: std.mem.Allocator, array_str: []const u8) s
return array;
}
pub fn parseArrayDatetimeUnix(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(u64) {
var array = std.ArrayList(u64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseDatetime(x).toUnix()) catch {};
}
return array;
}
pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) {
var array = std.ArrayList(bool).init(allocator);
@ -123,6 +158,18 @@ pub fn parseArrayUUID(allocator: std.mem.Allocator, array_str: []const u8) std.A
return array;
}
pub fn parseArrayUUIDBytes(allocator: std.mem.Allocator, array_str: []const u8) ![]const [16]u8 {
var array = std.ArrayList([16]u8).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
const uuid = UUID.parse(x) catch continue;
array.append(uuid.bytes) catch continue;
}
return try array.toOwnedSlice();
}
// FIXME: I think it will not work if there is a ' inside the string, even \', need to fix that
pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) {
var array = std.ArrayList([]const u8).init(allocator);
@ -135,7 +182,7 @@ pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.Ar
array.append(x_copy) catch {};
}
allocator.free(array.pop()); // Remove the last because empty like the first one
if (array.items.len > 0) allocator.free(array.pop()); // Remove the last because empty like the first one
return array;
}
@ -145,7 +192,7 @@ test "Value parsing: Int" {
// Int
const values: [3][]const u8 = .{ "1", "42", "Hello" };
const expected_values: [3]i64 = .{ 1, 42, 0 };
const expected_values: [3]i32 = .{ 1, 42, 0 };
for (values, 0..) |value, i| {
try std.testing.expect(parseInt(value) == expected_values[i]);
}
@ -154,8 +201,8 @@ test "Value parsing: Int" {
const array_str = "[1 14 44 42 hello]";
const array = parseArrayInt(allocator, array_str);
defer array.deinit();
const expected_array: [5]i64 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i64, array.items, &expected_array));
const expected_array: [5]i32 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i32, array.items, &expected_array));
}
test "Value parsing: Float" {

View File

@ -76,21 +76,6 @@ pub const FileEngine = struct {
return !std.mem.eql(u8, "", self.path_to_ZipponDB_dir);
}
const ComparisonValue = union {
int: i64,
float: f64,
str: []const u8,
bool_: bool,
link: UUID,
datetime: DateTime,
int_array: std.ArrayList(i64),
str_array: std.ArrayList([]const u8),
float_array: std.ArrayList(f64),
bool_array: std.ArrayList(bool),
link_array: std.ArrayList(UUID),
datetime_array: std.ArrayList(DateTime),
};
// --------------------Other--------------------
pub fn readSchemaFile(allocator: Allocator, sub_path: []const u8, buffer: []u8) FileEngineError!usize {
@ -228,10 +213,7 @@ pub const FileEngine = struct {
};
const struct_dir = data_dir.openDir(schema_struct.name, .{}) catch return FileEngineError.CantOpenDir;
_ = struct_dir.createFile("0.csv", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return FileEngineError.CantMakeFile,
};
zid.createFile("0.zid", struct_dir) catch return FileEngineError.CantMakeFile;
}
try self.writeSchemaFile();
@ -442,241 +424,33 @@ pub const FileEngine = struct {
}
/// Take a condition and an array of UUID and fill the array with all UUID that match the condition
/// TODO: Change the UUID function to be a B+Tree
/// TODO: Optimize the shit out of this, it it way too slow rn. Here some ideas
/// - Make multiple condition per row
/// - Array can take a very long time to parse, maybe put them in a seperate file. But string can be too...
/// - Use the stream directly in the tokenizer
/// - Use a fixed size and split into other file. Like one file for one member (Because very long, like an array of 1000 value) and another one for everything else
/// The threselhold can be like if the average len is > 400 character. So UUID would take less that 10% of the storage
/// - Save data in a more compact way
/// - Multithreading, each thread take a list of files and we mix them at the end
pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) FileEngineError!void {
const max_file_index = try self.maxFileIndex(condition.struct_name);
var current_index: usize = 0;
var path_buff = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.csv",
.{ self.path_to_ZipponDB_dir, condition.struct_name, current_index },
) catch return FileEngineError.MemoryError;
defer self.allocator.free(path_buff);
var file = std.fs.cwd().openFile(path_buff, .{}) catch return FileEngineError.CantOpenFile;
defer file.close();
var output: [BUFFER_SIZE]u8 = undefined;
var output_fbs = std.io.fixedBufferStream(&output);
const writer = output_fbs.writer();
var buffered = std.io.bufferedReader(file.reader());
var reader = buffered.reader();
var compare_value: ComparisonValue = undefined;
switch (condition.data_type) {
.int => compare_value = ComparisonValue{ .int = s2t.parseInt(condition.value) },
.str => compare_value = ComparisonValue{ .str = condition.value },
.float => compare_value = ComparisonValue{ .float = s2t.parseFloat(condition.value) },
.bool => compare_value = ComparisonValue{ .bool_ = s2t.parseBool(condition.value) },
.link => compare_value = ComparisonValue{ .link = UUID.parse(condition.value) catch return FileEngineError.InvalidUUID },
.date => compare_value = ComparisonValue{ .datetime = s2t.parseDate(condition.value) },
.time => compare_value = ComparisonValue{ .datetime = s2t.parseTime(condition.value) },
.datetime => compare_value = ComparisonValue{ .datetime = s2t.parseDatetime(condition.value) },
.int_array => compare_value = ComparisonValue{ .int_array = s2t.parseArrayInt(self.allocator, condition.value) },
.str_array => compare_value = ComparisonValue{ .str_array = s2t.parseArrayStr(self.allocator, condition.value) },
.float_array => compare_value = ComparisonValue{ .float_array = s2t.parseArrayFloat(self.allocator, condition.value) },
.bool_array => compare_value = ComparisonValue{ .bool_array = s2t.parseArrayBool(self.allocator, condition.value) },
.link_array => compare_value = ComparisonValue{ .link_array = s2t.parseArrayUUID(self.allocator, condition.value) },
.date_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayDate(self.allocator, condition.value) },
.time_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayTime(self.allocator, condition.value) },
.datetime_array => compare_value = ComparisonValue{ .datetime_array = s2t.parseArrayDatetime(self.allocator, condition.value) },
}
defer {
switch (condition.data_type) {
.int_array => compare_value.int_array.deinit(),
.str_array => {
for (compare_value.str_array.items) |value| self.allocator.free(value); // TODO: Remove that, I should need to free them one by one as condition.value keep it in memory
compare_value.str_array.deinit();
},
.float_array => compare_value.float_array.deinit(),
.bool_array => compare_value.bool_array.deinit(),
.link_array => compare_value.link_array.deinit(),
.datetime_array => compare_value.datetime_array.deinit(),
else => {},
}
}
var token: FileToken = undefined;
var found = false;
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) {
error.EndOfStream => {
// When end of file, check if all file was parse, if not update the reader to the next file
// TODO: Be able to give an array of file index from the B+Tree to only parse them
output_fbs.reset(); // clear buffer before exit
if (current_index == max_file_index) break;
current_index += 1;
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.csv",
.{ self.path_to_ZipponDB_dir, condition.struct_name, current_index },
) catch return FileEngineError.MemoryError;
file.close(); // Do I need to close ? I think so
file = std.fs.cwd().openFile(path_buff, .{}) catch return FileEngineError.CantOpenFile;
buffered = std.io.bufferedReader(file.reader());
reader = buffered.reader();
continue;
}, // file read till the end
else => return FileEngineError.StreamError,
};
// Maybe use the stream directly to prevent duplicate the data
// But I would need to change the Tokenizer a lot...
const null_terminated_string = self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]) catch return FileEngineError.MemoryError;
defer self.allocator.free(null_terminated_string);
var data_toker = FileTokenizer.init(null_terminated_string);
const uuid = UUID.parse(output_fbs.getWritten()[0..36]) catch return FileEngineError.InvalidUUID;
// Skip unwanted token
for (try self.structName2structMembers(condition.struct_name)) |member_name| {
if (std.mem.eql(u8, member_name, condition.member_name)) break;
_ = data_toker.next();
}
token = data_toker.next();
const row_value = data_toker.getTokenSlice(token);
found = switch (condition.operation) {
.equal => switch (condition.data_type) {
.int => compare_value.int == s2t.parseInt(row_value),
.float => compare_value.float == s2t.parseFloat(row_value),
.str => std.mem.eql(u8, compare_value.str, row_value),
.bool => compare_value.bool_ == s2t.parseBool(row_value),
.link => compare_value.link.compare(uuid),
.date => compare_value.datetime.compareDate(s2t.parseDate(row_value)),
.time => compare_value.datetime.compareTime(s2t.parseTime(row_value)),
.datetime => compare_value.datetime.compareDatetime(s2t.parseDatetime(row_value)),
else => unreachable,
},
.different => switch (condition.data_type) {
.int => compare_value.int != s2t.parseInt(row_value),
.float => compare_value.float != s2t.parseFloat(row_value),
.str => !std.mem.eql(u8, compare_value.str, row_value),
.bool => compare_value.bool_ != s2t.parseBool(row_value),
.link => !compare_value.link.compare(uuid),
.date => !compare_value.datetime.compareDate(s2t.parseDate(row_value)),
.time => !compare_value.datetime.compareTime(s2t.parseTime(row_value)),
.datetime => !compare_value.datetime.compareDatetime(s2t.parseDatetime(row_value)),
else => unreachable,
},
.superior_or_equal => switch (condition.data_type) {
.int => compare_value.int <= s2t.parseInt(data_toker.getTokenSlice(token)),
.float => compare_value.float <= s2t.parseFloat(data_toker.getTokenSlice(token)),
.date => compare_value.datetime.toUnix() <= s2t.parseDate(row_value).toUnix(),
.time => compare_value.datetime.toUnix() <= s2t.parseTime(row_value).toUnix(),
.datetime => compare_value.datetime.toUnix() <= s2t.parseDatetime(row_value).toUnix(),
else => unreachable,
},
.superior => switch (condition.data_type) {
.int => compare_value.int < s2t.parseInt(data_toker.getTokenSlice(token)),
.float => compare_value.float < s2t.parseFloat(data_toker.getTokenSlice(token)),
.date => compare_value.datetime.toUnix() < s2t.parseDate(row_value).toUnix(),
.time => compare_value.datetime.toUnix() < s2t.parseTime(row_value).toUnix(),
.datetime => compare_value.datetime.toUnix() < s2t.parseDatetime(row_value).toUnix(),
else => unreachable,
},
.inferior_or_equal => switch (condition.data_type) {
.int => compare_value.int >= s2t.parseInt(data_toker.getTokenSlice(token)),
.float => compare_value.float >= s2t.parseFloat(data_toker.getTokenSlice(token)),
.date => compare_value.datetime.toUnix() >= s2t.parseDate(row_value).toUnix(),
.time => compare_value.datetime.toUnix() >= s2t.parseTime(row_value).toUnix(),
.datetime => compare_value.datetime.toUnix() >= s2t.parseDatetime(row_value).toUnix(),
else => unreachable,
},
.inferior => switch (condition.data_type) {
.int => compare_value.int > s2t.parseInt(data_toker.getTokenSlice(token)),
.float => compare_value.float > s2t.parseFloat(data_toker.getTokenSlice(token)),
.date => compare_value.datetime.toUnix() > s2t.parseDate(row_value).toUnix(),
.time => compare_value.datetime.toUnix() > s2t.parseTime(row_value).toUnix(),
.datetime => compare_value.datetime.toUnix() > s2t.parseDatetime(row_value).toUnix(),
else => unreachable,
},
else => false,
};
// TODO: Do it for other array and implement in the query language
switch (condition.operation) {
.in => switch (condition.data_type) {
.link_array => {
for (compare_value.link_array.items) |elem| {
if (elem.compare(uuid)) uuid_array.append(uuid) catch return FileEngineError.MemoryError;
}
},
else => unreachable,
},
else => {},
}
if (found) uuid_array.append(uuid) catch return FileEngineError.MemoryError;
}
/// TODO: Use the new filter and DataIterator
pub fn getUUIDListUsingCondition(_: *FileEngine, _: Condition, _: *std.ArrayList(UUID)) FileEngineError!void {
return;
}
// --------------------Change existing files--------------------
// TODO: Change map to use a []Data from ZipponData
pub fn writeEntity(self: *FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) FileEngineError!UUID {
// TODO: Make it in batch too
pub fn writeEntity(self: *FileEngine, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError!UUID {
const uuid = UUID.init();
const potential_file_index = try self.getFirstUsableIndexFile(struct_name);
var file: std.fs.File = undefined;
defer file.close();
const file_index = try self.getFirstUsableIndexFile(struct_name);
var path: []const u8 = undefined;
const path = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.zid",
.{ self.path_to_ZipponDB_dir, struct_name, file_index },
) catch return FileEngineError.MemoryError;
defer self.allocator.free(path);
if (potential_file_index) |file_index| {
path = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.csv",
.{ self.path_to_ZipponDB_dir, struct_name, file_index },
) catch return FileEngineError.MemoryError;
file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch return FileEngineError.CantOpenFile;
} else {
const max_index = try self.maxFileIndex(struct_name);
var arena = std.heap.ArenaAllocator.init(self.allocator);
defer arena.deinit();
const data = try self.orderedNewData(arena.allocator(), struct_name, map);
path = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.csv",
.{ self.path_to_ZipponDB_dir, struct_name, max_index + 1 },
) catch return FileEngineError.MemoryError;
file = std.fs.cwd().createFile(path, .{}) catch return FileEngineError.CantMakeFile;
}
file.seekFromEnd(0) catch return FileEngineError.WriteError; // Not really a write error tho
const writer = file.writer();
writer.print("{s}", .{uuid.format_uuid()}) catch return FileEngineError.WriteError;
for (try self.structName2structMembers(struct_name)) |member_name| {
writer.writeByte(CSV_DELIMITER) catch return FileEngineError.WriteError;
writer.print("{s}", .{data_map.get(member_name).?}) catch return FileEngineError.WriteError; // Change that for csv
}
writer.print("\n", .{}) catch return FileEngineError.WriteError;
var writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError;
writer.write(data) catch return FileEngineError.ZipponDataError;
writer.flush() catch return FileEngineError.ZipponDataError;
return uuid;
}
@ -994,13 +768,89 @@ pub const FileEngine = struct {
return deleted_count;
}
// --------------------ZipponData utils--------------------
// Function that take a map from the parseNewData and return an ordered array of Data
pub fn orderedNewData(self: *FileEngine, allocator: Allocator, struct_name: []const u8, map: std.StringHashMap([]const u8)) FileEngineError![]const zid.Data {
const members = try self.structName2structMembers(struct_name);
const types = try self.structName2DataType(struct_name);
var datas = allocator.alloc(zid.Data, members.len) catch return FileEngineError.MemoryError;
for (members, types, 0..) |member, dt, i| {
switch (dt) {
.int => datas[i] = zid.Data.initInt(s2t.parseInt(map.get(member).?)),
.float => datas[i] = zid.Data.initFloat(s2t.parseFloat(map.get(member).?)),
.bool => datas[i] = zid.Data.initBool(s2t.parseBool(map.get(member).?)),
.date => datas[i] = zid.Data.initUnix(s2t.parseDate(map.get(member).?).toUnix()),
.time => datas[i] = zid.Data.initUnix(s2t.parseTime(map.get(member).?).toUnix()),
.datetime => datas[i] = zid.Data.initUnix(s2t.parseDatetime(map.get(member).?).toUnix()),
.str => datas[i] = zid.Data.initStr(map.get(member).?),
.link => {
const uuid = UUID.parse(map.get(member).?) catch return FileEngineError.InvalidUUID;
datas[i] = zid.Data{ .UUID = uuid.bytes };
},
.int_array => {
var array = s2t.parseArrayInt(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initIntArray(zid.allocEncodArray.Int(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.float_array => {
var array = s2t.parseArrayFloat(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initFloatArray(zid.allocEncodArray.Float(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.str_array => {
var array = s2t.parseArrayStr(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initStrArray(zid.allocEncodArray.Str(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.bool_array => {
var array = s2t.parseArrayBool(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initFloatArray(zid.allocEncodArray.Bool(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.link_array => {
const array = s2t.parseArrayUUIDBytes(allocator, map.get(member).?) catch return FileEngineError.MemoryError;
defer self.allocator.free(array);
datas[i] = zid.Data.initUUIDArray(zid.allocEncodArray.UUID(allocator, array) catch return FileEngineError.AllocEncodError);
},
.date_array => {
var array = s2t.parseArrayDateUnix(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.time_array => {
var array = s2t.parseArrayTimeUnix(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
.datetime_array => {
var array = s2t.parseArrayDatetimeUnix(allocator, map.get(member).?);
defer array.deinit();
datas[i] = zid.Data.initUnixArray(zid.allocEncodArray.Unix(allocator, array.items) catch return FileEngineError.AllocEncodError);
},
}
}
return datas;
}
// --------------------Schema utils--------------------
/// Get the index of the first file that is bellow the size limit. If not found, return null
fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!?usize {
/// Get the index of the first file that is bellow the size limit. If not found, create a new file
fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) FileEngineError!usize {
log.debug("Getting first usable index file for {s} at {s}", .{ struct_name, self.path_to_ZipponDB_dir });
const path = std.fmt.allocPrint(
var path = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}",
.{ self.path_to_ZipponDB_dir, struct_name },
@ -1010,14 +860,27 @@ pub const FileEngine = struct {
var member_dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return FileEngineError.CantOpenDir;
defer member_dir.close();
var i: usize = 0;
var iter = member_dir.iterate();
while (iter.next() catch return FileEngineError.DirIterError) |entry| {
i += 1;
const file_stat = member_dir.statFile(entry.name) catch return FileEngineError.FileStatError;
if (file_stat.size < MAX_FILE_SIZE) {
return std.fmt.parseInt(usize, entry.name[0..(entry.name.len - 4)], 10) catch return FileEngineError.InvalidFileIndex; // TODO: Change the slice when start using CSV
// Cant I just return i ? It is supossed that files are ordered. I think I already check and it is not
return std.fmt.parseInt(usize, entry.name[0..(entry.name.len - 4)], 10) catch return FileEngineError.InvalidFileIndex; // INFO: Hardcoded len of file extension
}
}
return null;
i += 1;
path = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}/{d}.zid",
.{ self.path_to_ZipponDB_dir, struct_name, i },
) catch return FileEngineError.MemoryError;
zid.createFile(path, null) catch return FileEngineError.ZipponDataError;
return i;
}
/// Iterate over all file of a struct and return the index of the last file.

View File

@ -10,6 +10,7 @@ pub const ZiQlParserError = error{
ConditionError,
WriteError,
AndOrError,
CantWriteEntity,
};
pub const SchemaParserError = error{
@ -41,6 +42,8 @@ pub const FileEngineError = error{
RenameFileError,
StructNotFound,
MemberNotFound,
ZipponDataError,
AllocEncodError,
};
pub const ZipponError = ZiQlParserError || FileEngineError || SchemaParserError;

View File

@ -9,8 +9,12 @@
// For {(name = 'Adrien' OR name = 'Bob') AND (age > 80 OR age < 20)}
const std = @import("std");
const s2t = @import("dtype").s2t;
const ZipponError = @import("errors.zig").ZipponError;
const DataType = @import("dtype").DataType;
const DateTime = @import("dtype").DateTime;
const UUID = @import("dtype").UUID;
const Data = @import("ZipponData").Data;
const ComparisonOperator = enum {
equal,
@ -46,11 +50,99 @@ const LogicalOperator = enum {
}
};
pub const ConditionValue = union(enum) {
int: i32,
float: f64,
str: []const u8,
bool_: bool,
link: UUID,
unix: u64,
int_array: std.ArrayList(i32),
str_array: std.ArrayList([]const u8),
float_array: std.ArrayList(f64),
bool_array: std.ArrayList(bool),
link_array: std.ArrayList(UUID),
unix_array: std.ArrayList(u64),
pub fn deinit(self: ConditionValue) void {
switch (self) {
.int_array => self.int_array.deinit(),
.str_array => self.str_array.deinit(),
.float_array => self.float_array.deinit(),
.bool_array => self.bool_array.deinit(),
.link_array => self.link_array.deinit(),
.unix_array => self.unix_array.deinit(),
else => {},
}
}
pub fn initInt(value: []const u8) ConditionValue {
return ConditionValue{ .int = s2t.parseInt(value) };
}
pub fn initFloat(value: []const u8) ConditionValue {
return ConditionValue{ .float = s2t.parseFloat(value) };
}
pub fn initStr(value: []const u8) ConditionValue {
return ConditionValue{ .str = value };
}
pub fn initBool(value: []const u8) ConditionValue {
return ConditionValue{ .bool_ = s2t.parseBool(value) };
}
pub fn initDate(value: []const u8) ConditionValue {
return ConditionValue{ .unix = s2t.parseDate(value).toUnix() };
}
pub fn initTime(value: []const u8) ConditionValue {
return ConditionValue{ .unix = s2t.parseTime(value).toUnix() };
}
pub fn initDateTime(value: []const u8) ConditionValue {
return ConditionValue{ .unix = s2t.parseDatetime(value).toUnix() };
}
// Array
pub fn initArrayInt(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .int_array = s2t.parseArrayInt(allocator, value) };
}
pub fn initArrayFloat(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .float_array = s2t.parseArrayFloat(allocator, value) };
}
pub fn initArrayStr(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .str_array = s2t.parseArrayStr(allocator, value) };
}
pub fn initArrayBool(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .bool_array = s2t.parseArrayBool(allocator, value) };
}
pub fn initArrayDate(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .unix_array = s2t.parseArrayDateUnix(allocator, value) };
}
pub fn initArrayTime(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .unix_array = s2t.parseArrayTimeUnix(allocator, value) };
}
pub fn initArrayDateTime(allocator: std.mem.Allocator, value: []const u8) ConditionValue {
return ConditionValue{ .unix_array = s2t.parseArrayDatetimeUnix(allocator, value) };
}
};
pub const Condition = struct {
value: []const u8 = undefined,
value: ConditionValue = undefined,
operation: ComparisonOperator = undefined,
data_type: DataType = undefined,
data_index: usize = undefined, // Index in the file
pub fn deinit(self: Condition) void {
self.value.deinit();
}
};
const FilterNode = union(enum) {
@ -76,11 +168,25 @@ pub const Filter = struct {
pub fn deinit(self: *Filter) void {
switch (self.root.*) {
.logical => self.freeNode(self.root),
.condition => |condition| condition.deinit(),
else => {},
}
self.allocator.destroy(self.root);
}
fn freeNode(self: *Filter, node: *FilterNode) void {
switch (node.*) {
.logical => |logical| {
self.freeNode(logical.left);
self.freeNode(logical.right);
self.allocator.destroy(logical.left);
self.allocator.destroy(logical.right);
},
.condition => |condition| condition.deinit(),
.empty => {},
}
}
pub fn addCondition(self: *Filter, condition: Condition) ZipponError!void {
const node = self.allocator.create(FilterNode) catch return ZipponError.MemoryError;
node.* = FilterNode{ .condition = condition };
@ -140,44 +246,72 @@ pub const Filter = struct {
}
}
fn freeNode(self: *Filter, node: *FilterNode) void {
switch (node.*) {
.logical => |logical| {
self.freeNode(logical.left);
self.freeNode(logical.right);
self.allocator.destroy(logical.left);
self.allocator.destroy(logical.right);
},
.condition => {},
.empty => {},
}
}
// TODO: Use []Data and make it work
pub fn evaluate(self: *const Filter, row: anytype) bool {
return self.evaluateNode(&self.root, row);
pub fn evaluate(self: Filter, row: []Data) bool {
return self.evaluateNode(self.root, row);
}
fn evaluateNode(self: *const Filter, node: *const FilterNode, row: anytype) bool {
fn evaluateNode(self: Filter, node: *FilterNode, row: []Data) bool {
return switch (node.*) {
.condition => |cond| self.evaluateCondition(cond, row),
.condition => |cond| Filter.evaluateCondition(cond, row),
.logical => |log| switch (log.operator) {
.AND => self.evaluateNode(log.left, row) and self.evaluateNode(log.right, row),
.OR => self.evaluateNode(log.left, row) or self.evaluateNode(log.right, row),
},
.empty => unreachable, // FIXME: I think this is reachable. At least if this is the root node, so it return always true. Like in the query GRAB User {}
};
}
fn evaluateCondition(condition: Condition, row: anytype) bool {
const field_value = @field(row, condition.member_name);
fn evaluateCondition(condition: Condition, row: []Data) bool {
const row_value: Data = row[condition.data_index];
return switch (condition.operation) {
.equal => std.mem.eql(u8, field_value, condition.value),
.different => !std.mem.eql(u8, field_value, condition.value),
.superior => field_value > condition.value,
.superior_or_equal => field_value >= condition.value,
.inferior => field_value < condition.value,
.inferior_or_equal => field_value <= condition.value,
.in => @panic("Not implemented"), // Implement this based on your needs
.equal => switch (condition.data_type) {
.int => row_value.Int == condition.value.int,
.float => row_value.Float == condition.value.float,
.str => std.mem.eql(u8, row_value.Str, condition.value.str),
.bool => row_value.Bool == condition.value.bool_,
.date, .time, .datetime => row_value.Unix == condition.value.unix,
else => unreachable,
},
.different => switch (condition.data_type) {
.int => row_value.Int != condition.value.int,
.float => row_value.Float != condition.value.float,
.str => !std.mem.eql(u8, row_value.Str, condition.value.str),
.bool => row_value.Bool != condition.value.bool_,
.date, .time, .datetime => row_value.Unix != condition.value.unix,
else => unreachable,
},
.superior_or_equal => switch (condition.data_type) {
.int => row_value.Int <= condition.value.int,
.float => row_value.Float <= condition.value.float,
.date, .time, .datetime => row_value.Unix <= condition.value.unix,
else => unreachable,
},
.superior => switch (condition.data_type) {
.int => row_value.Int < condition.value.int,
.float => row_value.Float < condition.value.float,
.date, .time, .datetime => row_value.Unix < condition.value.unix,
else => unreachable,
},
.inferior_or_equal => switch (condition.data_type) {
.int => row_value.Int >= condition.value.int,
.float => row_value.Float >= condition.value.float,
.date, .time, .datetime => row_value.Unix >= condition.value.unix,
else => unreachable,
},
.inferior => switch (condition.data_type) {
.int => row_value.Int > condition.value.int,
.float => row_value.Float > condition.value.float,
.date, .time, .datetime => row_value.Unix > condition.value.unix,
else => unreachable,
},
else => false,
};
}
@ -195,7 +329,7 @@ pub const Filter = struct {
self.printNode(logical.right.*);
std.debug.print(" ) ", .{});
},
.condition => |condition| std.debug.print("{d} {s} {s} |{any}|", .{
.condition => |condition| std.debug.print("{d} {s} {any} |{any}|", .{
condition.data_index,
condition.operation.str(),
condition.value,
@ -205,3 +339,24 @@ pub const Filter = struct {
}
}
};
test "Evaluate" {
const allocator = std.testing.allocator;
var data = [_]Data{
Data.initInt(1),
Data.initFloat(3.14159),
Data.initInt(-5),
Data.initStr("Hello world"),
Data.initBool(true),
};
var filter = try Filter.init(allocator);
defer filter.deinit();
try filter.addCondition(Condition{ .value = ConditionValue.initInt("1"), .data_index = 0, .operation = .equal, .data_type = .int });
filter.debugPrint();
_ = filter.evaluate(&data);
}

View File

@ -77,7 +77,7 @@ pub fn printError(message: []const u8, err: ZipponError, query: ?[]const u8, sta
writer.print(" \n", .{}) catch {}; // Align with the message
}
log.debug("Parsing error: {s}", .{buffer.items});
// log.debug("Parsing error: {s}", .{buffer.items});
send("{s}", .{buffer.items});
return err;

View File

@ -5,6 +5,7 @@ const Tokenizer = @import("tokenizers/ziql.zig").Tokenizer;
const Token = @import("tokenizers/ziql.zig").Token;
const dtype = @import("dtype");
const s2t = dtype.s2t;
const UUID = dtype.UUID;
const AND = dtype.AND;
const OR = dtype.OR;
@ -12,6 +13,7 @@ const DataType = dtype.DataType;
const Filter = @import("stuffs/filter.zig").Filter;
const Condition = @import("stuffs/filter.zig").Condition;
const ConditionValue = @import("stuffs/filter.zig").ConditionValue;
const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData;
const AdditionalDataMember = @import("stuffs/additionalData.zig").AdditionalDataMember;
@ -111,6 +113,20 @@ pub const Parser = struct {
send("{s}", .{buffer.items});
}
pub fn sendUUID(self: Parser, uuid: UUID) ZiQlParserError!void {
var buffer = std.ArrayList(u8).init(self.allocator);
defer buffer.deinit();
const writer = buffer.writer();
writer.writeByte('[') catch return ZiQlParserError.WriteError;
writer.writeByte('"') catch return ZiQlParserError.WriteError;
writer.writeAll(&uuid.format_uuid()) catch return ZiQlParserError.WriteError;
writer.writeAll("\", ") catch return ZiQlParserError.WriteError;
writer.writeByte(']') catch return ZiQlParserError.WriteError;
send("{s}", .{buffer.items});
}
pub fn parse(self: Parser) ZipponError!void {
var state: State = .start;
var additional_data = AdditionalData.init(self.allocator);
@ -349,10 +365,10 @@ pub const Parser = struct {
var error_message_buffer = std.ArrayList(u8).init(self.allocator);
defer error_message_buffer.deinit();
const error_message_buffer_writer = error_message_buffer.writer();
error_message_buffer_writer.writeAll("Error missing: ") catch return ZipponError.WriteError;
// TODO: Print the entire list of missing
if (!(self.file_engine.checkIfAllMemberInMap(struct_name, &data_map, &error_message_buffer) catch {
return ZiQlParserError.StructNotFound;
})) {
@ -366,22 +382,8 @@ pub const Parser = struct {
token.loc.end,
);
}
const uuid = self.file_engine.writeEntity(struct_name, data_map) catch {
send("ZipponDB error: Couln't write new data to file", .{});
continue;
};
var buffer = std.ArrayList(u8).init(self.allocator);
defer buffer.deinit();
const writer = buffer.writer();
writer.writeByte('[') catch return ZiQlParserError.WriteError;
writer.writeByte('"') catch return ZiQlParserError.WriteError;
writer.writeAll(&uuid.format_uuid()) catch return ZiQlParserError.WriteError;
writer.writeAll("\"") catch return ZiQlParserError.WriteError;
writer.writeByte(']') catch return ZiQlParserError.WriteError;
send("{s}", .{buffer.items});
const uuid = self.file_engine.writeEntity(struct_name, data_map) catch return ZipponError.CantWriteEntity;
try self.sendUUID(uuid);
state = .end;
},
@ -638,7 +640,16 @@ pub const Parser = struct {
}
}
condition.value = self.toker.buffer[start_index..token.loc.end];
condition.value = switch (condition.data_type) {
.int => ConditionValue.initInt(self.toker.buffer[start_index..token.loc.end]),
.float => ConditionValue.initFloat(self.toker.buffer[start_index..token.loc.end]),
.str => ConditionValue.initStr(self.toker.buffer[start_index..token.loc.end]),
.date => ConditionValue.initDate(self.toker.buffer[start_index..token.loc.end]),
.time => ConditionValue.initTime(self.toker.buffer[start_index..token.loc.end]),
.datetime => ConditionValue.initDateTime(self.toker.buffer[start_index..token.loc.end]),
.bool => ConditionValue.initBool(self.toker.buffer[start_index..token.loc.end]),
else => unreachable, // TODO: Make for link and array =|
};
state = .end;
},
@ -1026,46 +1037,6 @@ test "ADD" {
try testParsing("ADD User (name = 'Bob', email='bob@email.com', age=-55, scores=[ 1 ], friends=[], bday=2000/01/01, a_time=12:04:54.8741, last_order=2000/01/01-12:45)");
}
test "UPDATE" {
try testParsing("UPDATE User {name = 'Bob'} TO (email='new@gmail.com')");
}
test "DELETE" {
try testParsing("DELETE User {name='Bob'}");
}
test "GRAB filter with string" {
try testParsing("GRAB User {name = 'Bob'}");
try testParsing("GRAB User {name != 'Brittany Rogers'}");
}
test "GRAB with additional data" {
try testParsing("GRAB User [1] {age < 18}");
try testParsing("GRAB User [name] {age < 18}");
try testParsing("GRAB User [100; name] {age < 18}");
}
test "GRAB filter with int" {
try testParsing("GRAB User {age = 18}");
try testParsing("GRAB User {age > -18}");
try testParsing("GRAB User {age < 18}");
try testParsing("GRAB User {age <= 18}");
try testParsing("GRAB User {age >= 18}");
try testParsing("GRAB User {age != 18}");
}
test "GRAB filter with date" {
try testParsing("GRAB User {bday > 2000/01/01}");
try testParsing("GRAB User {a_time < 08:00}");
try testParsing("GRAB User {last_order > 2000/01/01-12:45}");
}
test "Specific query" {
try testParsing("GRAB User");
try testParsing("GRAB User {}");
try testParsing("GRAB User [1]");
}
test "Synthax error" {
try expectParsingError("GRAB {}", ZiQlParserError.StructNotFound);
try expectParsingError("GRAB User {qwe = 'qwe'}", ZiQlParserError.MemberNotFound);