Passed to one tabular file for each struct

Created a new Parser unique for the FileEngine to read each line.
It is slower as I need to parser character by character because their is
no fixed len for the data in files. Before I was just reading until the
end of the file.

Im gonna need to find some tricks to improve the parsing of data. I am
thinking using the stream directly instead of doing streamUntilDelimiter
This commit is contained in:
Adrien Bouvais 2024-10-09 23:20:28 +02:00
parent 8c8b10ef2a
commit b008f434a6
5 changed files with 405 additions and 257 deletions

View File

@ -18,16 +18,6 @@ pub fn build(b: *std.Build) void {
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// Test step
const tests1 = b.addTest(.{
.root_source_file = b.path("src/dataParser.zig"),
.target = target,
.optimize = optimize,
.name = "Data parsing",
.test_runner = b.path("test_runner.zig"),
});
const run_tests1 = b.addRunArtifact(tests1);
const tests2 = b.addTest(.{
.root_source_file = b.path("src/tokenizers/cli.zig"),
.target = target,
@ -83,7 +73,6 @@ pub fn build(b: *std.Build) void {
const run_tests7 = b.addRunArtifact(tests7);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_tests1.step);
test_step.dependOn(&run_tests2.step);
test_step.dependOn(&run_tests3.step);
test_step.dependOn(&run_tests4.step);

View File

@ -1,111 +0,0 @@
const std = @import("std");
// Series of functions to use just before creating an entity.
// Will transform the string of data into data of the right type.
// Maybe return a null or something else
pub fn parseInt(value_str: []const u8) i64 {
return std.fmt.parseInt(i64, value_str, 10) catch return 0;
}
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) {
var array = std.ArrayList(i64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseInt(x)) catch {};
}
return array;
}
pub fn parseFloat(value_str: []const u8) f64 {
return std.fmt.parseFloat(f64, value_str) catch return 0;
}
pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) {
var array = std.ArrayList(f64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseFloat(x)) catch {};
}
return array;
}
pub fn parseBool(value_str: []const u8) bool {
return (value_str[0] != '0');
}
pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) {
var array = std.ArrayList(bool).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseBool(x)) catch {};
}
return array;
}
// FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't
pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) {
var array = std.ArrayList([]const u8).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
const x_copy = allocator.dupe(u8, x) catch @panic("=(");
array.append(x_copy) catch {};
}
return array;
}
test "Data parsing" {
const allocator = std.testing.allocator;
// Int
const in1: [3][]const u8 = .{ "1", "42", "Hello" };
const expected_out1: [3]i64 = .{ 1, 42, 0 };
for (in1, 0..) |value, i| {
try std.testing.expect(parseInt(value) == expected_out1[i]);
}
// Int array
const in2 = "[1 14 44 42 hello]";
const out2 = parseArrayInt(allocator, in2);
defer out2.deinit();
const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2));
// Float
const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" };
const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 };
for (in3, 0..) |value, i| {
try std.testing.expect(parseFloat(value) == expected_out3[i]);
}
// Float array
const in4 = "[1.5 14.3 44.9999 42 hello]";
const out4 = parseArrayFloat(allocator, in4);
defer out4.deinit();
const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 };
try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4));
// Bool
const in5: [3][]const u8 = .{ "1", "Hello", "0" };
const expected_out5: [3]bool = .{ true, true, false };
for (in5, 0..) |value, i| {
try std.testing.expect(parseBool(value) == expected_out5[i]);
}
// Bool array
const in6 = "[1 0 0 1 1]";
const out6 = parseArrayBool(allocator, in6);
defer out6.deinit();
const expected_out6: [5]bool = .{ true, false, false, true, true };
try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6));
// TODO: Test the string array
}

View File

@ -1,5 +1,4 @@
const std = @import("std");
const dataParsing = @import("dataParser.zig");
const schemaEngine = @import("schemaEngine.zig");
const Allocator = std.mem.Allocator;
const UUID = @import("types/uuid.zig").UUID;
@ -14,12 +13,138 @@ pub const FileEngine = struct {
path_to_DATA_dir: []const u8, // The path to the DATA folder
max_file_size: usize = 5e+4, // 50kb TODO: Change
const DataEngineError = error{
ErrorCreateDataFolder,
ErrorCreateStructFolder,
ErrorCreateMemberFolder,
ErrorCreateMainFile,
ErrorCreateDataFile,
pub const Token = struct {
tag: Tag,
loc: Loc,
pub const Loc = struct {
start: usize,
end: usize,
};
pub const Tag = enum {
eof,
invalid,
string_literal,
int_literal,
float_literal,
identifier,
equal,
bang, // !
pipe, // |
l_paren, // (
r_paren, // )
l_bracket, // [
r_bracket, // ]
l_brace, // {
r_brace, // }
semicolon, // ;
comma, // ,
angle_bracket_left, // <
angle_bracket_right, // >
angle_bracket_left_equal, // <=
angle_bracket_right_equal, // >=
equal_angle_bracket_right, // =>
period, // .
bang_equal, // !=
};
};
pub const Tokenizer = struct {
buffer: [:0]const u8,
index: usize,
// Maybe change that to use the stream directly so I dont have to read the line 2 times
pub fn init(buffer: [:0]const u8) Tokenizer {
// Skip the UTF-8 BOM if present.
return .{
.buffer = buffer,
.index = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0, // WTF ? I guess some OS add that or some shit like that
};
}
const State = enum {
start,
string_literal,
float,
int,
};
pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 {
return self.buffer[token.loc.start..token.loc.end];
}
pub fn next(self: *Tokenizer) Token {
// That ugly but work
if (self.buffer[self.index] == ' ') self.index += 1;
var state: State = .start;
var result: Token = .{
.tag = undefined,
.loc = .{
.start = self.index,
.end = undefined,
},
};
while (true) : (self.index += 1) {
const c = self.buffer[self.index];
if (self.index == self.buffer.len) break;
switch (state) {
.start => switch (c) {
'\'' => {
state = .string_literal;
result.tag = .string_literal;
},
'0'...'9', '-' => {
state = .int;
result.tag = .int_literal;
},
'[' => {
result.tag = .l_bracket;
self.index += 1;
break;
},
']' => {
result.tag = .r_bracket;
self.index += 1;
break;
},
else => std.debug.print("Unknow character: {c}\n", .{c}),
},
.string_literal => switch (c) {
'\'' => {
self.index += 1;
break;
},
else => continue,
},
.int => switch (c) {
'.' => {
state = .float;
result.tag = .float_literal;
},
'0'...'9' => continue,
else => break,
},
.float => switch (c) {
'0'...'9' => {
continue;
},
else => {
break;
},
},
}
}
result.loc.end = self.index;
return result;
}
};
const ComparisonValue = union {
@ -58,13 +183,10 @@ pub const FileEngine = struct {
/// Take a condition and an array of UUID and fill the array with all UUID that match the condition
pub fn getUUIDListUsingCondition(self: *FileEngine, condition: Condition, uuid_array: *std.ArrayList(UUID)) !void {
var file_names = std.ArrayList([]const u8).init(self.allocator);
self.getFilesNames(condition.struct_name, condition.member_name, &file_names) catch @panic("Can't get list of files");
defer file_names.deinit();
const max_file_index = try self.maxFileIndex(condition.struct_name);
var current_index: usize = 0;
var current_file = file_names.pop();
var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, condition.struct_name, condition.member_name, current_file }) catch @panic("Can't create sub_path for init a DataIterator");
var sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
defer self.allocator.free(sub_path);
var file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator");
@ -79,14 +201,14 @@ pub const FileEngine = struct {
var compare_value: ComparisonValue = undefined;
switch (condition.data_type) {
.int => compare_value = ComparisonValue{ .int = dataParsing.parseInt(condition.value) },
.int => compare_value = ComparisonValue{ .int = parseInt(condition.value) },
.str => compare_value = ComparisonValue{ .str = condition.value },
.float => compare_value = ComparisonValue{ .float = dataParsing.parseFloat(condition.value) },
.bool => compare_value = ComparisonValue{ .bool_ = dataParsing.parseBool(condition.value) },
.int_array => compare_value = ComparisonValue{ .int_array = dataParsing.parseArrayInt(self.allocator, condition.value) },
.str_array => compare_value = ComparisonValue{ .str_array = dataParsing.parseArrayStr(self.allocator, condition.value) },
.float_array => compare_value = ComparisonValue{ .float_array = dataParsing.parseArrayFloat(self.allocator, condition.value) },
.bool_array => compare_value = ComparisonValue{ .bool_array = dataParsing.parseArrayBool(self.allocator, condition.value) },
.float => compare_value = ComparisonValue{ .float = parseFloat(condition.value) },
.bool => compare_value = ComparisonValue{ .bool_ = parseBool(condition.value) },
.int_array => compare_value = ComparisonValue{ .int_array = parseArrayInt(self.allocator, condition.value) },
.str_array => compare_value = ComparisonValue{ .str_array = parseArrayStr(self.allocator, condition.value) },
.float_array => compare_value = ComparisonValue{ .float_array = parseArrayFloat(self.allocator, condition.value) },
.bool_array => compare_value = ComparisonValue{ .bool_array = parseArrayBool(self.allocator, condition.value) },
}
defer {
switch (condition.data_type) {
@ -98,24 +220,27 @@ pub const FileEngine = struct {
}
}
var token: FileEngine.Token = undefined;
const column_index = schemaEngine.columnIndexOfMember(condition.struct_name, condition.member_name);
while (true) {
output_fbs.reset();
reader.streamUntilDelimiter(writer, '\n', null) catch |err| switch (err) {
error.EndOfStream => {
output_fbs.reset(); // clear buffer before exit
self.allocator.free(current_file);
if (file_names.items.len == 0) break;
if (current_index == max_file_index) break;
current_file = file_names.pop();
current_index += 1;
// Do I leak memory here ? Do I deinit every time ?
self.allocator.free(sub_path);
sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, condition.struct_name, condition.member_name, current_file }) catch @panic("Can't create sub_path for init a DataIterator");
sub_path = std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, condition.struct_name, current_index }) catch @panic("Can't create sub_path for init a DataIterator");
// Same here, do I close everytime ?
file.close();
file = std.fs.cwd().openFile(sub_path, .{}) catch @panic("Can't open first file to init a data iterator");
file.close(); // Do I need to close ? I think so
file = std.fs.cwd().openFile(sub_path, .{}) catch {
std.debug.print("Error trying to open {s}\n", .{sub_path});
@panic("Can't open first file to init a data iterator");
};
buffered = std.io.bufferedReader(file.reader());
reader = buffered.reader();
@ -127,58 +252,76 @@ pub const FileEngine = struct {
},
};
// TODO: Maybe put that directly inside the union type like a compare function
// Can also do the switch directly on the compare_value
// Maybe use the stream directly to prevent duplicate the data
// But I would need to change the Tokenizer a lot...
const null_terminated_string = try self.allocator.dupeZ(u8, output_fbs.getWritten()[37..]);
defer self.allocator.free(null_terminated_string);
var data_toker = Tokenizer.init(null_terminated_string);
const uuid = try UUID.parse(output_fbs.getWritten()[0..36]);
// Skip unwanted token
for (0..column_index.?) |_| {
_ = data_toker.next();
}
token = data_toker.next();
// TODO: Add error for wrong condition like superior between 2 string or array
switch (condition.operation) {
.equal => {
switch (condition.data_type) {
.int => if (compare_value.int == dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float == dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.str => if (std.mem.eql(u8, compare_value.str, output_fbs.getWritten()[37..output_fbs.getWritten().len])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.bool => if (compare_value.bool_ == dataParsing.parseBool(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int == parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float == parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.str => if (std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.bool => if (compare_value.bool_ == parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
},
.different => {
switch (condition.data_type) {
.int => if (compare_value.int != dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float != dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.str => if (!std.mem.eql(u8, compare_value.str, output_fbs.getWritten()[38 .. output_fbs.getWritten().len - 1])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.bool => if (compare_value.bool_ != dataParsing.parseBool(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int != parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float != parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.str => if (!std.mem.eql(u8, compare_value.str, data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.bool => if (compare_value.bool_ != parseBool(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
},
.superior_or_equal => {
switch (condition.data_type) {
.int => if (compare_value.int <= dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float <= dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int <= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float <= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
},
.superior => {
switch (condition.data_type) {
.int => if (compare_value.int < dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float < dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int < parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float < parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
},
.inferior_or_equal => {
switch (condition.data_type) {
.int => if (compare_value.int >= dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float >= dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int >= parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float >= parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
},
.inferior => {
switch (condition.data_type) {
.int => if (compare_value.int > dataParsing.parseInt(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.float => if (compare_value.float > dataParsing.parseFloat(output_fbs.getWritten()[37..])) try uuid_array.append(try UUID.parse(output_fbs.getWritten()[0..36])),
.int => if (compare_value.int > parseInt(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
.float => if (compare_value.float > parseFloat(data_toker.getTokenSlice(token))) try uuid_array.append(uuid),
// TODO: Implement for array too
else => {},
}
@ -191,71 +334,48 @@ pub const FileEngine = struct {
// Do I need multiple files too ? I mean it duplicate UUID a lot, if it's just to save a name like 'Bob', storing a long UUID is overkill
// I could just use a tabular data format with separator using space - Or maybe I encode the uuid to take a minimum space as I always know it size
pub fn writeEntity(self: FileEngine, struct_name: []const u8, data_map: std.StringHashMap([]const u8)) !UUID {
const uuid_str = UUID.init().format_uuid();
const uuid = UUID.init();
const member_names = schemaEngine.structName2structMembers(struct_name);
for (member_names) |member_name| {
const potential_file_name_to_use = try self.getFirstUsableFile(struct_name, member_name);
const potential_file_index = try self.getFirstUsableIndexFile(struct_name);
var file: std.fs.File = undefined;
defer file.close();
if (potential_file_name_to_use) |file_name| {
defer self.allocator.free(file_name);
var path: []const u8 = undefined;
defer self.allocator.free(path);
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name, file_name });
defer self.allocator.free(path);
if (potential_file_index) |file_index| {
path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, file_index });
file = std.fs.cwd().openFile(path, .{ .mode = .read_write }) catch @panic("=(");
} else {
const max_index = try self.maxFileIndex(struct_name);
var file = std.fs.cwd().openFile(path, .{
.mode = .read_write,
}) catch {
std.debug.print("Error opening data file.", .{});
continue; // TODO: Error handeling
};
defer file.close();
try file.seekFromEnd(0);
try file.writer().print("{s} {s}\n", .{ uuid_str, data_map.get(member_name).? });
} else {
const max_index = try self.maxFileIndex(struct_name, member_name);
const new_file_path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, member_name, max_index + 1 });
defer self.allocator.free(new_file_path);
const new_file = std.fs.cwd().createFile(new_file_path, .{}) catch @panic("Error creating new data file");
defer new_file.close();
try new_file.writer().print("{s} {s}\n", .{ &uuid_str, data_map.get(member_name).? });
}
path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{d}.zippondata", .{ self.path_to_DATA_dir, struct_name, max_index + 1 });
file = std.fs.cwd().createFile(path, .{}) catch @panic("Error creating new data file");
}
return UUID.parse(&uuid_str);
try file.seekFromEnd(0);
try file.writer().print("{s}", .{uuid.format_uuid()});
const member_names = schemaEngine.structName2structMembers(struct_name); // This need to be in the same order all the time tho
for (member_names) |member_name| {
try file.writer().print(" {s}", .{data_map.get(member_name).?});
}
try file.writer().print("\n", .{});
return uuid;
}
/// Use a filename in the format 1.zippondata and return the 1
/// Note that if I change the extension of the data file, I need to update that as it use a fixed len for the extension
fn fileName2Index(_: FileEngine, file_name: []const u8) usize {
var iter_file_name = std.mem.tokenize(u8, file_name, ".");
const num_str = iter_file_name.next().?;
const num: usize = std.fmt.parseInt(usize, num_str, 10) catch @panic("Couln't parse the int of a zippondata file.");
return num;
}
fn getFilesNames(self: FileEngine, struct_name: []const u8, member_name: []const u8, file_names: *std.ArrayList([]const u8)) !void {
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name });
defer self.allocator.free(path);
var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
defer member_dir.close();
var iter = member_dir.iterate();
defer iter.reset();
while (try iter.next()) |entry| {
if ((entry.kind != std.fs.Dir.Entry.Kind.file) or (std.mem.eql(u8, "main.zippondata", entry.name))) continue;
try file_names.*.append(try self.allocator.dupe(u8, entry.name));
}
return std.fmt.parseInt(usize, file_name[0..(file_name.len - 11)], 10) catch @panic("Couln't parse the int of a zippondata file.");
}
/// Use the map of file stat to find the first file with under the bytes limit.
/// return the name of the file. If none is found, return null.
fn getFirstUsableFile(self: FileEngine, struct_name: []const u8, member_name: []const u8) !?[]const u8 {
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name });
fn getFirstUsableIndexFile(self: FileEngine, struct_name: []const u8) !?usize {
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name });
defer self.allocator.free(path);
var member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
@ -263,18 +383,16 @@ pub const FileEngine = struct {
var iter = member_dir.iterate();
while (try iter.next()) |entry| {
if ((entry.kind != std.fs.Dir.Entry.Kind.file) or (std.mem.eql(u8, "main.zippondata", entry.name))) continue;
const file_stat = try member_dir.statFile(entry.name);
if (file_stat.size < self.max_file_size) return try self.allocator.dupe(u8, entry.name);
if (file_stat.size < self.max_file_size) return self.fileName2Index(entry.name);
}
return null;
}
/// Iter over all file and get the max name and return the value of it as usize
/// So for example if there is 1.zippondata and 2.zippondata it return 2.
fn maxFileIndex(self: FileEngine, struct_name: []const u8, member_name: []const u8) !usize {
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}/{s}", .{ self.path_to_DATA_dir, struct_name, member_name });
fn maxFileIndex(self: FileEngine, struct_name: []const u8) !usize {
const path = try std.fmt.allocPrint(self.allocator, "{s}/{s}", .{ self.path_to_DATA_dir, struct_name });
defer self.allocator.free(path);
const member_dir = try std.fs.cwd().openDir(path, .{ .iterate = true });
@ -296,23 +414,14 @@ pub const FileEngine = struct {
for (schemaEngine.struct_name_list) |struct_name| {
data_dir.makeDir(struct_name) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return DataEngineError.ErrorCreateStructFolder,
else => return err,
};
const struct_dir = try data_dir.openDir(struct_name, .{});
const member_names = schemaEngine.structName2structMembers(struct_name);
for (member_names) |member_name| {
struct_dir.makeDir(member_name) catch |err| switch (err) {
error.PathAlreadyExists => continue,
else => return DataEngineError.ErrorCreateMemberFolder,
};
const member_dir = try struct_dir.openDir(member_name, .{});
_ = member_dir.createFile("0.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return DataEngineError.ErrorCreateDataFile,
};
}
_ = struct_dir.createFile("0.zippondata", .{}) catch |err| switch (err) {
error.PathAlreadyExists => {},
else => return err,
};
}
}
};
@ -333,3 +442,126 @@ test "Open dir" {
const sub_dir = try dir.openDir("src/types", .{});
_ = sub_dir;
}
// Series of functions to use just before creating an entity.
// Will transform the string of data into data of the right type./
pub fn parseInt(value_str: []const u8) i64 {
return std.fmt.parseInt(i64, value_str, 10) catch return 0;
}
pub fn parseArrayInt(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(i64) {
var array = std.ArrayList(i64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseInt(x)) catch {};
}
return array;
}
pub fn parseFloat(value_str: []const u8) f64 {
return std.fmt.parseFloat(f64, value_str) catch return 0;
}
pub fn parseArrayFloat(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(f64) {
var array = std.ArrayList(f64).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseFloat(x)) catch {};
}
return array;
}
pub fn parseBool(value_str: []const u8) bool {
return (value_str[0] != '0');
}
pub fn parseArrayBool(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList(bool) {
var array = std.ArrayList(bool).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
array.append(parseBool(x)) catch {};
}
return array;
}
// FIXME: This will not work if their is a space in one string. E.g ['Hello world'] will be split between Hello and world but it shouldn't
pub fn parseArrayStr(allocator: std.mem.Allocator, array_str: []const u8) std.ArrayList([]const u8) {
var array = std.ArrayList([]const u8).init(allocator);
var it = std.mem.splitAny(u8, array_str[1 .. array_str.len - 1], " ");
while (it.next()) |x| {
const x_copy = allocator.dupe(u8, x) catch @panic("=(");
array.append(x_copy) catch {};
}
return array;
}
test "Data parsing" {
const allocator = std.testing.allocator;
// Int
const in1: [3][]const u8 = .{ "1", "42", "Hello" };
const expected_out1: [3]i64 = .{ 1, 42, 0 };
for (in1, 0..) |value, i| {
try std.testing.expect(parseInt(value) == expected_out1[i]);
}
// Int array
const in2 = "[1 14 44 42 hello]";
const out2 = parseArrayInt(allocator, in2);
defer out2.deinit();
const expected_out2: [5]i64 = .{ 1, 14, 44, 42, 0 };
try std.testing.expect(std.mem.eql(i64, out2.items, &expected_out2));
// Float
const in3: [3][]const u8 = .{ "1.3", "65.991", "Hello" };
const expected_out3: [3]f64 = .{ 1.3, 65.991, 0 };
for (in3, 0..) |value, i| {
try std.testing.expect(parseFloat(value) == expected_out3[i]);
}
// Float array
const in4 = "[1.5 14.3 44.9999 42 hello]";
const out4 = parseArrayFloat(allocator, in4);
defer out4.deinit();
const expected_out4: [5]f64 = .{ 1.5, 14.3, 44.9999, 42, 0 };
try std.testing.expect(std.mem.eql(f64, out4.items, &expected_out4));
// Bool
const in5: [3][]const u8 = .{ "1", "Hello", "0" };
const expected_out5: [3]bool = .{ true, true, false };
for (in5, 0..) |value, i| {
try std.testing.expect(parseBool(value) == expected_out5[i]);
}
// Bool array
const in6 = "[1 0 0 1 1]";
const out6 = parseArrayBool(allocator, in6);
defer out6.deinit();
const expected_out6: [5]bool = .{ true, false, false, true, true };
try std.testing.expect(std.mem.eql(bool, out6.items, &expected_out6));
// TODO: Test the string array
}
// Test tokenizer
test "basic query" {
try testTokenize("001 123 0185", &.{ .int_literal, .int_literal, .int_literal });
}
fn testTokenize(source: [:0]const u8, expected_token_tags: []const FileEngine.Token.Tag) !void {
var tokenizer = FileEngine.Tokenizer.init(source);
for (expected_token_tags) |expected_token_tag| {
const token = tokenizer.next();
try std.testing.expectEqual(expected_token_tag, token.tag);
}
}

View File

@ -20,6 +20,18 @@ pub const struct_type_list: [2][]const DataType = .{
&[_]DataType{.str},
};
// use to know how much token the Parser of the FileEngine need to pass before the right one
pub fn columnIndexOfMember(struct_name: []const u8, member_name: []const u8) ?usize {
var i: u16 = 0;
for (structName2structMembers(struct_name)) |mn| {
if (std.mem.eql(u8, mn, member_name)) return i;
i += 1;
}
return null;
}
/// Get the type of the member
pub fn memberName2DataType(struct_name: []const u8, member_name: []const u8) ?DataType {
var i: u16 = 0;

View File

@ -153,6 +153,7 @@ pub const Parser = struct {
},
}
},
.expect_struct_name => {
// Check if the struct name is in the schema
self.struct_name = try self.allocator.dupe(u8, self.toker.getTokenSlice(token));
@ -162,6 +163,7 @@ pub const Parser = struct {
else => self.state = .expect_filter_or_additional_data,
}
},
.expect_filter_or_additional_data => {
keep_next = true;
switch (token.tag) {
@ -170,10 +172,12 @@ pub const Parser = struct {
else => self.printError("Error: Expect [ for additional data or { for a filter", &token),
}
},
.parse_additional_data => {
try self.parseAdditionalData(&self.additional_data);
self.state = .filter_and_send;
},
.filter_and_send => {
var array = std.ArrayList(UUID).init(self.allocator);
defer array.deinit();
@ -181,6 +185,7 @@ pub const Parser = struct {
self.sendEntity(array.items);
self.state = .end;
},
.expect_new_data => {
switch (token.tag) {
.l_paren => {
@ -190,6 +195,7 @@ pub const Parser = struct {
else => self.printError("Error: Expecting new data starting with (", &token),
}
},
.parse_new_data_and_add_data => {
switch (self.action) {
.ADD => {
@ -210,6 +216,7 @@ pub const Parser = struct {
else => unreachable,
}
},
else => unreachable,
}
}
@ -231,9 +238,8 @@ pub const Parser = struct {
_ = self;
}
/// Take an array of UUID and populate it to be the array that represent filter between {}
/// Main is to know if between {} or (), main is true if between {} or the first to be call
/// TODO: Create a parseCondition
/// Take an array of UUID and populate it with what match what is between {}
/// Main is to know if between {} or (), main is true if between {}, otherwise between () inside {}
fn parseFilter(self: *Parser, left_array: *std.ArrayList(UUID), struct_name: []const u8, main: bool) !void {
var token = self.toker.next();
var keep_next = false;
@ -253,6 +259,7 @@ pub const Parser = struct {
self.state = State.expect_ANDOR_OR_end;
keep_next = true;
},
.expect_ANDOR_OR_end => {
switch (token.tag) {
.r_brace => {
@ -280,6 +287,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected a condition including AND or OR or } or )", &token),
}
},
.expect_right_uuid_array => {
var right_array = std.ArrayList(UUID).init(self.allocator);
defer right_array.deinit();
@ -307,14 +315,17 @@ pub const Parser = struct {
std.debug.print("Token here {any}\n", .{token});
self.state = .expect_ANDOR_OR_end;
},
else => unreachable,
}
}
}
/// Parse to get a Condition< Which is a struct that is use by the FileEngine to retreive data.
/// In the query, it is this part name = 'Bob' or age <= 10
fn parseCondition(self: *Parser, condition: *Condition, token_ptr: *Token) Token {
var keep_next = false;
self.state = State.expect_member;
self.state = .expect_member;
var token = token_ptr.*;
while (self.state != State.end) : ({
@ -335,6 +346,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected member name.", &token),
}
},
.expect_operation => {
switch (token.tag) {
.equal => condition.operation = .equal, // =
@ -347,6 +359,7 @@ pub const Parser = struct {
}
self.state = State.expect_value;
},
.expect_value => {
switch (condition.data_type) {
.int => {
@ -420,13 +433,14 @@ pub const Parser = struct {
}
self.state = .end;
},
else => unreachable,
}
}
return token;
}
/// When this function is call, the tokenizer last token retrieved should be [.
/// When this function is call, nect token should be [
/// Check if an int is here -> check if ; is here -> check if member is here -> check if [ is here -> loop
fn parseAdditionalData(self: *Parser, additional_data: *AdditionalData) !void {
var token = self.toker.next();
@ -455,6 +469,7 @@ pub const Parser = struct {
},
}
},
.expect_semicolon_OR_right_bracket => {
switch (token.tag) {
.semicolon => self.state = .expect_member,
@ -462,6 +477,7 @@ pub const Parser = struct {
else => self.printError("Error: Expect ';' or ']'.", &token),
}
},
.expect_member => {
switch (token.tag) {
.identifier => {
@ -478,6 +494,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected a member name.", &token),
}
},
.expect_comma_OR_r_bracket_OR_l_bracket => {
switch (token.tag) {
.comma => self.state = .expect_member,
@ -491,6 +508,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected , or ] or [", &token),
}
},
.expect_comma_OR_r_bracket => {
switch (token.tag) {
.comma => self.state = .expect_member,
@ -498,6 +516,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected , or ]", &token),
}
},
else => unreachable,
}
}
@ -527,6 +546,7 @@ pub const Parser = struct {
else => self.printError("Error: Expected member name.", &token),
}
},
.expect_equal => {
switch (token.tag) {
// TODO: Add more comparison like IN or other stuff
@ -534,14 +554,15 @@ pub const Parser = struct {
else => self.printError("Error: Expected =", &token),
}
},
.expect_new_value => {
const data_type = schemaEngine.memberName2DataType(self.struct_name, member_name);
switch (data_type.?) {
.int => {
switch (token.tag) {
.int_literal, .keyword_null => {
keep_next = true;
self.state = .add_member_to_map;
member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
else => self.printError("Error: Expected int", &token),
}
@ -549,17 +570,25 @@ pub const Parser = struct {
.float => {
switch (token.tag) {
.float_literal, .keyword_null => {
keep_next = true;
self.state = .add_member_to_map;
member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
else => self.printError("Error: Expected float", &token),
}
},
.bool => {
switch (token.tag) {
.bool_literal_true, .bool_literal_false, .keyword_null => {
keep_next = true;
self.state = .add_member_to_map;
.bool_literal_true => {
member_map.put(member_name, "1") catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
.bool_literal_false => {
member_map.put(member_name, "0") catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
.keyword_null => {
member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
else => self.printError("Error: Expected bool: true false", &token),
}
@ -567,8 +596,8 @@ pub const Parser = struct {
.str => {
switch (token.tag) {
.string_literal, .keyword_null => {
keep_next = true;
self.state = .add_member_to_map;
member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
else => self.printError("Error: Expected string between ''", &token),
}
@ -648,11 +677,7 @@ pub const Parser = struct {
},
}
},
.add_member_to_map => {
member_map.put(member_name, self.toker.getTokenSlice(token)) catch @panic("Could not add member name and value to map in getMapOfMember");
self.state = .expect_comma_OR_end;
},
.add_array_to_map => {},
.expect_comma_OR_end => {
switch (token.tag) {
.r_paren => self.state = .end,
@ -660,6 +685,7 @@ pub const Parser = struct {
else => self.printError("Error: Expect , or )", &token),
}
},
else => unreachable,
}
}