Added UUID -> File index hash map

SchemaStruct member now are [] and not ArrayList. Started to use and
understand toOwnedSlice.

Implemented the hashmap that keep all UUID -> File index. Inside each
SchemaStruct
This commit is contained in:
Adrien Bouvais 2024-11-01 17:10:13 +01:00
parent aff8fac0af
commit c37999cbfc
2 changed files with 140 additions and 40 deletions

View File

@ -43,24 +43,29 @@ pub const FileEngine = struct {
pub fn init(allocator: Allocator, path: []const u8) FileEngineError!FileEngine { pub fn init(allocator: Allocator, path: []const u8) FileEngineError!FileEngine {
const path_to_ZipponDB_dir = path; const path_to_ZipponDB_dir = path;
var schema_buf = allocator.alloc(u8, BUFFER_SIZE) catch return FileEngineError.MemoryError; var schema_buf = allocator.alloc(u8, BUFFER_SIZE) catch return FileEngineError.MemoryError; // TODO: Use a list
defer allocator.free(schema_buf); defer allocator.free(schema_buf);
const len: usize = FileEngine.readSchemaFile(allocator, path_to_ZipponDB_dir, schema_buf) catch 0; const len: usize = FileEngine.readSchemaFile(allocator, path_to_ZipponDB_dir, schema_buf) catch 0;
const null_terminated_schema_buff = allocator.dupeZ(u8, schema_buf[0..len]) catch return FileEngineError.MemoryError; const null_terminated_schema_buff = allocator.dupeZ(u8, schema_buf[0..len]) catch return FileEngineError.MemoryError;
errdefer allocator.free(null_terminated_schema_buff);
var toker = SchemaTokenizer.init(null_terminated_schema_buff); var toker = SchemaTokenizer.init(null_terminated_schema_buff);
var parser = SchemaParser.init(&toker, allocator); var parser = SchemaParser.init(&toker, allocator);
var struct_array = std.ArrayList(SchemaStruct).init(allocator); var struct_array = std.ArrayList(SchemaStruct).init(allocator);
parser.parse(&struct_array) catch {}; parser.parse(&struct_array) catch return FileEngineError.SchemaNotConform;
return FileEngine{ var file_engine = FileEngine{
.allocator = allocator, .allocator = allocator,
.path_to_ZipponDB_dir = path_to_ZipponDB_dir, .path_to_ZipponDB_dir = path_to_ZipponDB_dir,
.null_terminated_schema_buff = null_terminated_schema_buff, .null_terminated_schema_buff = null_terminated_schema_buff,
.struct_array = struct_array.toOwnedSlice() catch return FileEngineError.MemoryError, .struct_array = struct_array.toOwnedSlice() catch return FileEngineError.MemoryError,
}; };
try file_engine.populateAllUUIDToFileIndexMap();
return file_engine;
} }
pub fn deinit(self: *FileEngine) void { pub fn deinit(self: *FileEngine) void {
@ -221,6 +226,33 @@ pub const FileEngine = struct {
// --------------------Read and parse files-------------------- // --------------------Read and parse files--------------------
// For all struct in shema, add the UUID/index_file into the map
pub fn populateAllUUIDToFileIndexMap(self: *FileEngine) FileEngineError!void {
for (self.struct_array) |*sstruct| { // Stand for schema struct
const max_file_index = try self.maxFileIndex(sstruct.name);
var path_buff = std.fmt.allocPrint(
self.allocator,
"{s}/DATA/{s}",
.{ self.path_to_ZipponDB_dir, sstruct.name },
) catch return FileEngineError.MemoryError;
defer self.allocator.free(path_buff);
const dir = std.fs.cwd().openDir(path_buff, .{}) catch return FileEngineError.CantOpenDir;
for (0..(max_file_index + 1)) |i| {
self.allocator.free(path_buff);
path_buff = std.fmt.allocPrint(self.allocator, "{d}.zid", .{i}) catch return FileEngineError.MemoryError;
var iter = zid.DataIterator.init(self.allocator, path_buff, dir, sstruct.zid_schema) catch return FileEngineError.ZipponDataError;
defer iter.deinit();
while (iter.next() catch return FileEngineError.ZipponDataError) |row| {
sstruct.uuid_file_index.put(row[0].UUID, i) catch return FileEngineError.MemoryError;
}
}
}
}
/// Take a list of UUID and, a buffer array and the additional data to write into the buffer the JSON to send /// Take a list of UUID and, a buffer array and the additional data to write into the buffer the JSON to send
/// TODO: Optimize /// TODO: Optimize
/// FIXME: Array of string are not working /// FIXME: Array of string are not working
@ -775,9 +807,12 @@ pub const FileEngine = struct {
const members = try self.structName2structMembers(struct_name); const members = try self.structName2structMembers(struct_name);
const types = try self.structName2DataType(struct_name); const types = try self.structName2DataType(struct_name);
var datas = allocator.alloc(zid.Data, members.len) catch return FileEngineError.MemoryError; var datas = allocator.alloc(zid.Data, (members.len + 1)) catch return FileEngineError.MemoryError;
for (members, types, 0..) |member, dt, i| { const new_uuid = UUID.init();
datas[0] = zid.Data.initUUID(new_uuid.bytes);
for (members, types, 1..) |member, dt, i| {
switch (dt) { switch (dt) {
.int => datas[i] = zid.Data.initInt(s2t.parseInt(map.get(member).?)), .int => datas[i] = zid.Data.initInt(s2t.parseInt(map.get(member).?)),
.float => datas[i] = zid.Data.initFloat(s2t.parseFloat(map.get(member).?)), .float => datas[i] = zid.Data.initFloat(s2t.parseFloat(map.get(member).?)),
@ -964,7 +999,7 @@ pub const FileEngine = struct {
return FileEngineError.StructNotFound; return FileEngineError.StructNotFound;
} }
return self.struct_array[i].members.items; return self.struct_array[i].members;
} }
pub fn structName2DataType(self: *FileEngine, struct_name: []const u8) FileEngineError![]const DataType { pub fn structName2DataType(self: *FileEngine, struct_name: []const u8) FileEngineError![]const DataType {
@ -978,7 +1013,7 @@ pub const FileEngine = struct {
return FileEngineError.StructNotFound; return FileEngineError.StructNotFound;
} }
return self.struct_array[i].types.items; return self.struct_array[i].types;
} }
/// Return the number of member of a struct /// Return the number of member of a struct

View File

@ -1,4 +1,5 @@
const std = @import("std"); const std = @import("std");
const zid = @import("ZipponData");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const DataType = @import("dtype").DataType; const DataType = @import("dtype").DataType;
const Toker = @import("tokenizers/schema.zig").Tokenizer; const Toker = @import("tokenizers/schema.zig").Tokenizer;
@ -20,6 +21,7 @@ const State = enum {
expext_array_type, expext_array_type,
expect_two_dot, expect_two_dot,
expect_comma, expect_comma,
add_struct,
}; };
pub const Parser = struct { pub const Parser = struct {
@ -38,30 +40,68 @@ pub const Parser = struct {
pub const SchemaStruct = struct { pub const SchemaStruct = struct {
allocator: Allocator, allocator: Allocator,
name: []const u8, name: []const u8,
members: std.ArrayList([]const u8), members: [][]const u8,
types: std.ArrayList(DataType), types: []DataType,
zid_schema: []zid.DType,
links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name, like a dtype links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name, like a dtype
uuid_file_index: std.AutoHashMap([16]u8, u64), // Map UUID to the index of the file they are store in
pub fn init(allocator: Allocator, name: []const u8) SchemaStruct { pub fn init(
allocator: Allocator,
name: []const u8,
members: [][]const u8,
types: []DataType,
links: std.StringHashMap([]const u8),
) SchemaParserError!SchemaStruct {
return SchemaStruct{ return SchemaStruct{
.allocator = allocator, .allocator = allocator,
.name = name, .name = name,
.members = std.ArrayList([]const u8).init(allocator), .members = members,
.types = std.ArrayList(DataType).init(allocator), .types = types,
.links = std.StringHashMap([]const u8).init(allocator), .zid_schema = SchemaStruct.fileDataSchema(allocator, types) catch return SchemaParserError.MemoryError,
.links = links,
.uuid_file_index = std.AutoHashMap([16]u8, u64).init(allocator),
}; };
} }
pub fn deinit(self: *SchemaStruct) void { pub fn deinit(self: *SchemaStruct) void {
self.types.deinit(); self.allocator.free(self.members);
self.members.deinit(); self.allocator.free(self.types);
self.allocator.free(self.zid_schema);
self.links.deinit(); self.links.deinit();
self.uuid_file_index.deinit();
}
fn fileDataSchema(allocator: Allocator, dtypes: []DataType) SchemaParserError![]zid.DType {
var schema = std.ArrayList(zid.DType).init(allocator);
schema.append(zid.DType.UUID) catch return SchemaParserError.MemoryError;
for (dtypes) |dt| {
schema.append(switch (dt) {
DataType.int => zid.DType.Int,
DataType.float => zid.DType.Float,
DataType.str => zid.DType.Str,
DataType.bool => zid.DType.Bool,
DataType.link => zid.DType.UUID,
DataType.date => zid.DType.Unix,
DataType.time => zid.DType.Unix,
DataType.datetime => zid.DType.Unix,
DataType.int_array => zid.DType.IntArray,
DataType.float_array => zid.DType.FloatArray,
DataType.str_array => zid.DType.StrArray,
DataType.bool_array => zid.DType.BoolArray,
DataType.link_array => zid.DType.UUIDArray,
DataType.date_array => zid.DType.UnixArray,
DataType.time_array => zid.DType.UnixArray,
DataType.datetime_array => zid.DType.UnixArray,
}) catch return SchemaParserError.MemoryError;
}
return schema.toOwnedSlice() catch return SchemaParserError.MemoryError;
} }
}; };
pub fn parse(self: *Parser, struct_array: *std.ArrayList(SchemaStruct)) !void { pub fn parse(self: *Parser, struct_array: *std.ArrayList(SchemaStruct)) !void {
var state: State = .expect_struct_name_OR_end; var state: State = .expect_struct_name_OR_end;
var index: usize = 0;
var keep_next = false; var keep_next = false;
errdefer { errdefer {
@ -76,6 +116,14 @@ pub const Parser = struct {
var member_token: Token = undefined; var member_token: Token = undefined;
var name: []const u8 = undefined;
var member_list = std.ArrayList([]const u8).init(self.allocator);
defer member_list.deinit();
var type_list = std.ArrayList(DataType).init(self.allocator);
defer type_list.deinit();
var links = std.StringHashMap([]const u8).init(self.allocator);
defer links.deinit();
var token = self.toker.next(); var token = self.toker.next();
while ((state != .end) and (state != .invalid)) : ({ while ((state != .end) and (state != .invalid)) : ({
token = if (!keep_next) self.toker.next() else token; token = if (!keep_next) self.toker.next() else token;
@ -84,7 +132,7 @@ pub const Parser = struct {
.expect_struct_name_OR_end => switch (token.tag) { .expect_struct_name_OR_end => switch (token.tag) {
.identifier => { .identifier => {
state = .expect_l_paren; state = .expect_l_paren;
struct_array.append(SchemaStruct.init(self.allocator, self.toker.getTokenSlice(token))) catch return SchemaParserError.MemoryError; name = self.toker.getTokenSlice(token);
}, },
.eof => state = .end, .eof => state = .end,
else => return printError( else => return printError(
@ -112,10 +160,7 @@ pub const Parser = struct {
state = .expect_member_name; state = .expect_member_name;
keep_next = true; keep_next = true;
}, },
.r_paren => { .r_paren => state = .add_struct,
state = .expect_struct_name_OR_end;
index += 1;
},
else => return printError( else => return printError(
"Error parsing schema: Expected member name or )", "Error parsing schema: Expected member name or )",
SchemaParserError.SynthaxError, SchemaParserError.SynthaxError,
@ -125,9 +170,29 @@ pub const Parser = struct {
), ),
}, },
.add_struct => {
std.debug.print("Adding new schema\n", .{});
struct_array.append(try SchemaStruct.init(
self.allocator,
name,
member_list.toOwnedSlice() catch return SchemaParserError.MemoryError,
type_list.toOwnedSlice() catch return SchemaParserError.MemoryError,
try links.clone(),
)) catch return SchemaParserError.MemoryError;
links.deinit();
links = std.StringHashMap([]const u8).init(self.allocator);
member_list = std.ArrayList([]const u8).init(self.allocator);
type_list = std.ArrayList(DataType).init(self.allocator);
state = .expect_struct_name_OR_end;
},
.expect_member_name => { .expect_member_name => {
state = .expect_two_dot; state = .expect_two_dot;
struct_array.items[index].members.append(self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError; member_list.append(self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
member_token = token; member_token = token;
}, },
@ -145,36 +210,36 @@ pub const Parser = struct {
.expect_value_type => switch (token.tag) { .expect_value_type => switch (token.tag) {
.type_int => { .type_int => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.int) catch return SchemaParserError.MemoryError; type_list.append(.int) catch return SchemaParserError.MemoryError;
}, },
.type_str => { .type_str => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.str) catch return SchemaParserError.MemoryError; type_list.append(.str) catch return SchemaParserError.MemoryError;
}, },
.type_float => { .type_float => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.float) catch return SchemaParserError.MemoryError; type_list.append(.float) catch return SchemaParserError.MemoryError;
}, },
.type_bool => { .type_bool => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.bool) catch return SchemaParserError.MemoryError; type_list.append(.bool) catch return SchemaParserError.MemoryError;
}, },
.type_date => { .type_date => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.date) catch return SchemaParserError.MemoryError; type_list.append(.date) catch return SchemaParserError.MemoryError;
}, },
.type_time => { .type_time => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.time) catch return SchemaParserError.MemoryError; type_list.append(.time) catch return SchemaParserError.MemoryError;
}, },
.type_datetime => { .type_datetime => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.datetime) catch return SchemaParserError.MemoryError; type_list.append(.datetime) catch return SchemaParserError.MemoryError;
}, },
.identifier => { .identifier => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.link) catch return SchemaParserError.MemoryError; type_list.append(.link) catch return SchemaParserError.MemoryError;
struct_array.items[index].links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError; links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
}, },
.lr_bracket => state = .expext_array_type, .lr_bracket => state = .expext_array_type,
else => return printError( else => return printError(
@ -189,36 +254,36 @@ pub const Parser = struct {
.expext_array_type => switch (token.tag) { .expext_array_type => switch (token.tag) {
.type_int => { .type_int => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.int_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.int_array) catch return SchemaParserError.MemoryError;
}, },
.type_str => { .type_str => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.str_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.str_array) catch return SchemaParserError.MemoryError;
}, },
.type_float => { .type_float => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.float_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.float_array) catch return SchemaParserError.MemoryError;
}, },
.type_bool => { .type_bool => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.bool_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.bool_array) catch return SchemaParserError.MemoryError;
}, },
.type_date => { .type_date => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.date_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.date_array) catch return SchemaParserError.MemoryError;
}, },
.type_time => { .type_time => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.time_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.time_array) catch return SchemaParserError.MemoryError;
}, },
.type_datetime => { .type_datetime => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(DataType.datetime_array) catch return SchemaParserError.MemoryError; type_list.append(DataType.datetime_array) catch return SchemaParserError.MemoryError;
}, },
.identifier => { .identifier => {
state = .expect_comma; state = .expect_comma;
struct_array.items[index].types.append(.link) catch return SchemaParserError.MemoryError; type_list.append(.link) catch return SchemaParserError.MemoryError;
struct_array.items[index].links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError; links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
}, },
else => return printError( else => return printError(
"Error parsing schema: Expected data type", "Error parsing schema: Expected data type",