ZipponDB/src/schemaParser.zig
MrBounty 3539dd685c Fix, perf ect
- Added a new data type self, that represent the id of the intity itself
- Fixed multi threading for parsing, now each thread use it's own writer
and I concat them at the end
- Added a schemaStruct id to the list
- Other fixe and stuff to go with the rest

New step, multi threading for all function then finally relationship
2024-11-03 19:18:25 +01:00

312 lines
12 KiB
Zig

const std = @import("std");
const zid = @import("ZipponData");
const Allocator = std.mem.Allocator;
const DataType = @import("dtype").DataType;
const Toker = @import("tokenizers/schema.zig").Tokenizer;
const Token = @import("tokenizers/schema.zig").Token;
const Loc = @import("tokenizers/shared/loc.zig").Loc;
const send = @import("stuffs/utils.zig").send;
const printError = @import("stuffs/utils.zig").printError;
const SchemaParserError = @import("stuffs/errors.zig").SchemaParserError;
const State = enum {
end,
invalid,
expect_struct_name_OR_end,
expect_member_name,
expect_l_paren,
expect_member_name_OR_r_paren,
expect_value_type,
expext_array_type,
expect_two_dot,
expect_comma,
add_struct,
};
pub const Parser = struct {
toker: *Toker,
allocator: Allocator,
pub fn init(toker: *Toker, allocator: Allocator) Parser {
return .{
.allocator = allocator,
.toker = toker,
};
}
// Rename something better and move it somewhere else
pub const SchemaStruct = struct {
allocator: Allocator,
name: []const u8,
members: [][]const u8,
types: []DataType,
zid_schema: []zid.DType,
links: std.StringHashMap([]const u8), // Map key as member_name and value as struct_name, like a dtype
uuid_file_index: std.AutoHashMap([16]u8, u64), // Map UUID to the index of the file they are store in
pub fn init(
allocator: Allocator,
name: []const u8,
members: [][]const u8,
types: []DataType,
links: std.StringHashMap([]const u8),
) SchemaParserError!SchemaStruct {
return SchemaStruct{
.allocator = allocator,
.name = name,
.members = members,
.types = types,
.zid_schema = SchemaStruct.fileDataSchema(allocator, types) catch return SchemaParserError.MemoryError,
.links = links,
.uuid_file_index = std.AutoHashMap([16]u8, u64).init(allocator),
};
}
pub fn deinit(self: *SchemaStruct) void {
self.allocator.free(self.members);
self.allocator.free(self.types);
self.allocator.free(self.zid_schema);
self.links.deinit();
self.uuid_file_index.deinit();
}
fn fileDataSchema(allocator: Allocator, dtypes: []DataType) SchemaParserError![]zid.DType {
var schema = std.ArrayList(zid.DType).init(allocator);
for (dtypes) |dt| {
schema.append(switch (dt) {
.int => .Int,
.float => .Float,
.str => .Str,
.bool => .Bool,
.link, .self => .UUID,
.date => .Unix,
.time => .Unix,
.datetime => .Unix,
.int_array => .IntArray,
.float_array => .FloatArray,
.str_array => .StrArray,
.bool_array => .BoolArray,
.link_array => .UUIDArray,
.date_array => .UnixArray,
.time_array => .UnixArray,
.datetime_array => .UnixArray,
}) catch return SchemaParserError.MemoryError;
}
return schema.toOwnedSlice() catch return SchemaParserError.MemoryError;
}
};
pub fn parse(self: *Parser, struct_array: *std.ArrayList(SchemaStruct)) !void {
var state: State = .expect_struct_name_OR_end;
var keep_next = false;
errdefer {
for (0..struct_array.items.len) |i| {
struct_array.items[i].deinit();
}
for (0..struct_array.items.len) |_| {
_ = struct_array.pop();
}
}
var member_token: Token = undefined;
var name: []const u8 = undefined;
var member_list = std.ArrayList([]const u8).init(self.allocator);
defer member_list.deinit();
var type_list = std.ArrayList(DataType).init(self.allocator);
defer type_list.deinit();
var links = std.StringHashMap([]const u8).init(self.allocator);
defer links.deinit();
var token = self.toker.next();
while ((state != .end) and (state != .invalid)) : ({
token = if (!keep_next) self.toker.next() else token;
keep_next = false;
}) switch (state) {
.expect_struct_name_OR_end => switch (token.tag) {
.identifier => {
state = .expect_l_paren;
name = self.toker.getTokenSlice(token);
member_list.append("id") catch return SchemaParserError.MemoryError;
type_list.append(.self) catch return SchemaParserError.MemoryError;
},
.eof => state = .end,
else => return printError(
"Error parsing schema: Expected a struct name",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_l_paren => switch (token.tag) {
.l_paren => state = .expect_member_name,
else => return printError(
"Error parsing schema: Expected (",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_member_name_OR_r_paren => switch (token.tag) {
.identifier => {
state = .expect_member_name;
keep_next = true;
},
.r_paren => state = .add_struct,
else => return printError(
"Error parsing schema: Expected member name or )",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.add_struct => {
struct_array.append(try SchemaStruct.init(
self.allocator,
name,
member_list.toOwnedSlice() catch return SchemaParserError.MemoryError,
type_list.toOwnedSlice() catch return SchemaParserError.MemoryError,
try links.clone(),
)) catch return SchemaParserError.MemoryError;
links.deinit();
links = std.StringHashMap([]const u8).init(self.allocator);
member_list = std.ArrayList([]const u8).init(self.allocator);
type_list = std.ArrayList(DataType).init(self.allocator);
state = .expect_struct_name_OR_end;
},
.expect_member_name => {
state = .expect_two_dot;
member_list.append(self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
member_token = token;
},
.expect_two_dot => switch (token.tag) {
.two_dot => state = .expect_value_type,
else => return printError(
"Error parsing schema: Expected :",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_value_type => switch (token.tag) {
.type_int => {
state = .expect_comma;
type_list.append(.int) catch return SchemaParserError.MemoryError;
},
.type_str => {
state = .expect_comma;
type_list.append(.str) catch return SchemaParserError.MemoryError;
},
.type_float => {
state = .expect_comma;
type_list.append(.float) catch return SchemaParserError.MemoryError;
},
.type_bool => {
state = .expect_comma;
type_list.append(.bool) catch return SchemaParserError.MemoryError;
},
.type_date => {
state = .expect_comma;
type_list.append(.date) catch return SchemaParserError.MemoryError;
},
.type_time => {
state = .expect_comma;
type_list.append(.time) catch return SchemaParserError.MemoryError;
},
.type_datetime => {
state = .expect_comma;
type_list.append(.datetime) catch return SchemaParserError.MemoryError;
},
.identifier => {
state = .expect_comma;
type_list.append(.link) catch return SchemaParserError.MemoryError;
links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
},
.lr_bracket => state = .expext_array_type,
else => return printError(
"Error parsing schema: Expected data type",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expext_array_type => switch (token.tag) {
.type_int => {
state = .expect_comma;
type_list.append(.int_array) catch return SchemaParserError.MemoryError;
},
.type_str => {
state = .expect_comma;
type_list.append(.str_array) catch return SchemaParserError.MemoryError;
},
.type_float => {
state = .expect_comma;
type_list.append(.float_array) catch return SchemaParserError.MemoryError;
},
.type_bool => {
state = .expect_comma;
type_list.append(.bool_array) catch return SchemaParserError.MemoryError;
},
.type_date => {
state = .expect_comma;
type_list.append(.date_array) catch return SchemaParserError.MemoryError;
},
.type_time => {
state = .expect_comma;
type_list.append(.time_array) catch return SchemaParserError.MemoryError;
},
.type_datetime => {
state = .expect_comma;
type_list.append(.datetime_array) catch return SchemaParserError.MemoryError;
},
.identifier => {
state = .expect_comma;
type_list.append(.link_array) catch return SchemaParserError.MemoryError;
links.put(self.toker.getTokenSlice(member_token), self.toker.getTokenSlice(token)) catch return SchemaParserError.MemoryError;
},
else => return printError(
"Error parsing schema: Expected data type",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_comma => switch (token.tag) {
.comma => state = .expect_member_name_OR_r_paren,
else => return printError(
"Error parsing schema: Expected ,",
SchemaParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
else => unreachable,
};
}
};
// TODO: Some test, weird that there isn't any yet