New Filter start to work

Changing how condition are handle to create a tree of condition to only
parse once.

So far the basic work, need more testing.
This commit is contained in:
Adrien Bouvais 2024-10-27 19:55:07 +01:00
parent 5756e3a530
commit 334c738ac1
4 changed files with 300 additions and 160 deletions

View File

@ -16,6 +16,8 @@ const SchemaToken = @import("tokenizers/schema.zig").Token;
const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData;
const Loc = @import("tokenizers/shared/loc.zig").Loc;
const Condition = @import("stuffs/filter.zig").Condition;
// TODO: Move that to another struct, not in the file engine
const SchemaStruct = @import("schemaParser.zig").Parser.SchemaStruct;
const SchemaParser = @import("schemaParser.zig").Parser;
@ -89,23 +91,6 @@ pub const FileEngine = struct {
datetime_array: std.ArrayList(DateTime),
};
/// use to parse file. It take a struct name and member name to know what to parse.
/// An Operation from equal, different, superior, superior_or_equal, ...
/// The DataType from int, float and str
/// TODO: Use token from the query for struct_name, member_name and value, to save memory
/// TODO: Update to do multiple operation at the same tome on a row
pub const Condition = struct {
struct_name: []const u8,
member_name: []const u8 = undefined,
value: []const u8 = undefined,
operation: enum { equal, different, superior, superior_or_equal, inferior, inferior_or_equal, in } = undefined,
data_type: DataType = undefined,
pub fn init(struct_loc: []const u8) Condition {
return Condition{ .struct_name = struct_loc };
}
};
// --------------------Other--------------------
pub fn readSchemaFile(allocator: Allocator, sub_path: []const u8, buffer: []u8) FileEngineError!usize {

199
src/stuffs/filter.zig Normal file
View File

@ -0,0 +1,199 @@
const std = @import("std");
const ZipponError = @import("errors.zig").ZipponError;
const DataType = @import("dtype").DataType;
pub const ComparisonOperator = enum {
equal,
different,
superior,
superior_or_equal,
inferior,
inferior_or_equal,
in,
pub fn str(self: ComparisonOperator) []const u8 {
return switch (self) {
.equal => "=",
.different => "!=",
.superior => ">",
.superior_or_equal => ">=",
.inferior => "<",
.inferior_or_equal => "<=",
.in => "IN",
};
}
};
pub const LogicalOperator = enum {
AND,
OR,
pub fn str(self: LogicalOperator) []const u8 {
return switch (self) {
.AND => "AND",
.OR => "OR",
};
}
};
pub const Condition = struct {
member_name: []const u8 = undefined,
value: []const u8 = undefined,
operation: ComparisonOperator = undefined,
data_type: DataType = undefined,
// data_index: usize TODO: add this member, this is the position in the row of the value, to use in the evaluate method
};
pub const FilterNode = union(enum) {
condition: Condition,
logical: struct {
operator: LogicalOperator,
left: *FilterNode,
right: *FilterNode,
},
empty: bool,
};
pub const Filter = struct {
allocator: std.mem.Allocator,
root: *FilterNode,
pub fn init(allocator: std.mem.Allocator) ZipponError!Filter {
const node = allocator.create(FilterNode) catch return ZipponError.MemoryError;
node.* = FilterNode{ .empty = true };
return .{ .allocator = allocator, .root = node };
}
pub fn deinit(self: *Filter) void {
switch (self.root.*) {
.logical => self.freeNode(self.root),
else => {},
}
self.allocator.destroy(self.root);
}
pub fn addCondition(self: *Filter, condition: Condition) ZipponError!void {
const node = self.allocator.create(FilterNode) catch return ZipponError.MemoryError;
node.* = FilterNode{ .condition = condition };
switch (self.root.*) {
.empty => {
self.allocator.destroy(self.root);
self.root = node;
},
.logical => {
var current = self.root;
var founded = false;
while (!founded) switch (current.logical.right.*) {
.empty => founded = true,
.logical => {
current = current.logical.right;
founded = false;
},
.condition => unreachable,
};
self.allocator.destroy(current.logical.right);
current.logical.right = node;
},
.condition => unreachable,
}
}
pub fn addLogicalOperator(self: *Filter, operator: LogicalOperator) ZipponError!void {
const empty_node = self.allocator.create(FilterNode) catch return ZipponError.MemoryError;
empty_node.* = FilterNode{ .empty = true };
const node = self.allocator.create(FilterNode) catch return ZipponError.MemoryError;
node.* = FilterNode{ .logical = .{ .operator = operator, .left = self.root, .right = empty_node } };
self.root = node;
}
pub fn addSubFilter(self: *Filter, sub_filter: *Filter) void {
switch (self.root.*) {
.empty => {
self.allocator.destroy(self.root);
self.root = sub_filter.root;
},
.logical => {
var current = self.root;
var founded = false;
while (!founded) switch (current.logical.right.*) {
.empty => founded = true,
.logical => {
current = current.logical.right;
founded = false;
},
.condition => unreachable,
};
self.allocator.destroy(current.logical.right);
current.logical.right = sub_filter.root;
},
.condition => unreachable,
}
}
fn freeNode(self: *Filter, node: *FilterNode) void {
switch (node.*) {
.logical => |logical| {
self.freeNode(logical.left);
self.freeNode(logical.right);
self.allocator.destroy(logical.left);
self.allocator.destroy(logical.right);
},
.condition => {},
.empty => {},
}
}
// TODO: Use []Data and make it work
pub fn evaluate(self: *const Filter, row: anytype) bool {
return self.evaluateNode(&self.root, row);
}
fn evaluateNode(self: *const Filter, node: *const FilterNode, row: anytype) bool {
return switch (node.*) {
.condition => |cond| self.evaluateCondition(cond, row),
.logical => |log| switch (log.operator) {
.AND => self.evaluateNode(log.left, row) and self.evaluateNode(log.right, row),
.OR => self.evaluateNode(log.left, row) or self.evaluateNode(log.right, row),
},
};
}
fn evaluateCondition(condition: Condition, row: anytype) bool {
const field_value = @field(row, condition.member_name);
return switch (condition.operation) {
.equal => std.mem.eql(u8, field_value, condition.value),
.different => !std.mem.eql(u8, field_value, condition.value),
.superior => field_value > condition.value,
.superior_or_equal => field_value >= condition.value,
.inferior => field_value < condition.value,
.inferior_or_equal => field_value <= condition.value,
.in => @panic("Not implemented"), // Implement this based on your needs
};
}
pub fn debugPrint(self: Filter) void {
std.debug.print("\n\n", .{});
self.printNode(self.root.*);
std.debug.print("\n\n", .{});
}
fn printNode(self: Filter, node: FilterNode) void {
switch (node) {
.logical => |logical| {
std.debug.print(" ( ", .{});
self.printNode(logical.left.*);
std.debug.print(" {s} ", .{logical.operator.str()});
self.printNode(logical.right.*);
std.debug.print(" ) ", .{});
},
.condition => |condition| std.debug.print("{s} {s} {s} |{any}|", .{
condition.member_name,
condition.operation.str(),
condition.value,
condition.data_type,
}),
.empty => std.debug.print("Empty", .{}),
}
}
};

View File

@ -72,11 +72,16 @@ pub const Token = struct {
pub const Tokenizer = struct {
buffer: [:0]const u8,
index: usize,
last_token: Token = undefined,
pub fn getTokenSlice(self: *Tokenizer, token: Token) []const u8 {
return self.buffer[token.loc.start..token.loc.end];
}
pub fn last(self: Tokenizer) Token {
return self.last_token;
}
pub fn init(buffer: [:0]const u8) Tokenizer {
// Skip the UTF-8 BOM if present.
return .{
@ -364,6 +369,7 @@ pub const Tokenizer = struct {
}
result.loc.end = self.index;
self.last_token = result;
return result;
}
};

View File

@ -1,7 +1,6 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const FileEngine = @import("fileEngine.zig").FileEngine;
const Condition = @import("fileEngine.zig").FileEngine.Condition;
const Tokenizer = @import("tokenizers/ziql.zig").Tokenizer;
const Token = @import("tokenizers/ziql.zig").Token;
@ -11,6 +10,9 @@ const AND = dtype.AND;
const OR = dtype.OR;
const DataType = dtype.DataType;
const Filter = @import("stuffs/filter.zig").Filter;
const Condition = @import("stuffs/filter.zig").Condition;
const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData;
const AdditionalDataMember = @import("stuffs/additionalData.zig").AdditionalDataMember;
const send = @import("stuffs/utils.zig").send;
@ -47,6 +49,7 @@ const State = enum {
// For the filter parser
expect_left_condition, // Condition is a struct in FileEngine, it's all info necessary to get a list of UUID usinf FileEngine.getUUIDListUsingCondition
expect_right_condition,
expect_operation, // Operations are = != < <= > >=
expect_value,
expect_ANDOR_OR_end,
@ -381,83 +384,67 @@ pub const Parser = struct {
/// Take an array of UUID and populate it with what match what is between {}
/// Main is to know if between {} or (), main is true if between {}, otherwise between () inside {}
/// TODO: Optimize this so it can use multiple condition at the same time instead of parsing the all file for each condition
fn parseFilter(self: Parser, left_array: *std.ArrayList(UUID), struct_name: []const u8, main: bool) ZipponError!Token {
var token = self.toker.next();
fn parseFilter(self: Parser, struct_name: []const u8) ZipponError!Filter {
var filter = try Filter.init(self.allocator);
errdefer filter.deinit();
var keep_next = false;
var token = self.toker.next();
var state: State = .expect_left_condition;
var left_condition = Condition.init(struct_name);
var curent_operation: enum { and_, or_ } = undefined;
while (state != .end) : ({
token = if (!keep_next) self.toker.next() else token;
token = if (keep_next) token else self.toker.next();
keep_next = false;
}) switch (state) {
.expect_left_condition => switch (token.tag) {
.r_brace => {
try self.file_engine.getAllUUIDList(struct_name, left_array);
state = .end;
},
else => {
token = try self.parseCondition(&left_condition, &token);
try self.file_engine.getUUIDListUsingCondition(left_condition, left_array);
state = .expect_ANDOR_OR_end;
keep_next = true;
},
},
.expect_ANDOR_OR_end => switch (token.tag) {
.r_brace => if (main) {
state = .end;
} else {
return printError(
"Error: Expected } to end main condition or AND/OR to continue it",
ZiQlParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
);
},
.r_paren => if (!main) {
state = .end;
} else {
return printError(
"Error: Expected ) to end inside condition or AND/OR to continue it",
ZiQlParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
);
},
.keyword_and => {
curent_operation = .and_;
state = .expect_right_uuid_array;
},
.keyword_or => {
curent_operation = .or_;
state = .expect_right_uuid_array;
},
else => return printError(
"Error: Expected a condition including AND OR or the end of the filter with } or )",
ZiQlParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_right_uuid_array => {
var right_array = std.ArrayList(UUID).init(self.allocator);
defer right_array.deinit();
switch (token.tag) {
.l_paren => _ = try self.parseFilter(&right_array, struct_name, false), // run parserFilter to get the right array
.identifier => {
var right_condition = Condition.init(struct_name);
token = try self.parseCondition(&right_condition, &token);
}) {
switch (state) {
.expect_left_condition => switch (token.tag) {
.r_brace => {
state = .end;
},
else => {
const condition = try self.parseCondition(&token, struct_name);
try filter.addCondition(condition);
state = .expect_ANDOR_OR_end;
token = self.toker.last();
keep_next = true;
try self.file_engine.getUUIDListUsingCondition(right_condition, &right_array);
},
},
.expect_ANDOR_OR_end => switch (token.tag) {
.r_brace, .r_paren => {
state = .end;
},
.keyword_and => {
try filter.addLogicalOperator(.AND);
state = .expect_right_condition;
},
.keyword_or => {
try filter.addLogicalOperator(.OR);
state = .expect_right_condition;
},
else => return printError(
"Error: Expected AND, OR, or }",
ZiQlParserError.SynthaxError,
self.toker.buffer,
token.loc.start,
token.loc.end,
),
},
.expect_right_condition => switch (token.tag) {
.l_paren => {
var sub_filter = try self.parseFilter(struct_name);
filter.addSubFilter(&sub_filter);
token = self.toker.last();
keep_next = true;
state = .expect_ANDOR_OR_end;
},
.identifier => {
const condition = try self.parseCondition(&token, struct_name);
try filter.addCondition(condition);
token = self.toker.last();
keep_next = true;
state = .expect_ANDOR_OR_end;
},
else => return printError(
"Error: Expected ( or member name.",
@ -466,35 +453,31 @@ pub const Parser = struct {
token.loc.start,
token.loc.end,
),
}
},
switch (curent_operation) {
.and_ => AND(left_array, &right_array) catch return ZipponError.AndOrError,
.or_ => OR(left_array, &right_array) catch return ZipponError.AndOrError,
}
state = .expect_ANDOR_OR_end;
},
else => unreachable,
}
}
else => unreachable,
};
return token;
return filter;
}
/// Parse to get a Condition. Which is a struct that is use by the FileEngine to retreive data.
/// In the query, it is this part name = 'Bob' or age <= 10
fn parseCondition(self: Parser, condition: *Condition, token_ptr: *Token) ZipponError!Token {
fn parseCondition(self: Parser, token_ptr: *Token, struct_name: []const u8) ZipponError!Condition {
var keep_next = false;
var state: State = .expect_member;
var token = token_ptr.*;
var condition = Condition{};
while (state != .end) : ({
token = if (!keep_next) self.toker.next() else token;
keep_next = false;
}) switch (state) {
.expect_member => switch (token.tag) {
.identifier => {
if (!(self.file_engine.isMemberNameInStruct(condition.struct_name, self.toker.getTokenSlice(token)) catch {
if (!(self.file_engine.isMemberNameInStruct(struct_name, self.toker.getTokenSlice(token)) catch {
return printError(
"Error: Struct not found.",
ZiQlParserError.StructNotFound,
@ -512,7 +495,7 @@ pub const Parser = struct {
);
}
condition.data_type = self.file_engine.memberName2DataType(
condition.struct_name,
struct_name,
self.toker.getTokenSlice(token),
) catch return ZiQlParserError.MemberNotFound;
condition.member_name = self.toker.getTokenSlice(token);
@ -686,7 +669,7 @@ pub const Parser = struct {
else => unreachable,
}
return token;
return condition;
}
/// When this function is call, next token should be [
@ -990,62 +973,6 @@ pub const Parser = struct {
}
};
test "ADD" {
try testParsing("ADD User (name = 'Bob', email='bob@email.com', age=55, scores=[ 1 ], friends=[], bday=2000/01/01, a_time=12:04, last_order=2000/01/01-12:45)");
try testParsing("ADD User (name = 'Bob', email='bob@email.com', age=55, scores=[ 1 ], friends=[], bday=2000/01/01, a_time=12:04:54, last_order=2000/01/01-12:45)");
try testParsing("ADD User (name = 'Bob', email='bob@email.com', age=-55, scores=[ 1 ], friends=[], bday=2000/01/01, a_time=12:04:54.8741, last_order=2000/01/01-12:45)");
}
test "UPDATE" {
try testParsing("UPDATE User {name = 'Bob'} TO (email='new@gmail.com')");
}
test "DELETE" {
try testParsing("DELETE User {name='Bob'}");
}
test "GRAB filter with string" {
try testParsing("GRAB User {name = 'Bob'}");
try testParsing("GRAB User {name != 'Brittany Rogers'}");
}
test "GRAB with additional data" {
try testParsing("GRAB User [1] {age < 18}");
try testParsing("GRAB User [name] {age < 18}");
try testParsing("GRAB User [100; name] {age < 18}");
}
test "GRAB filter with int" {
try testParsing("GRAB User {age = 18}");
try testParsing("GRAB User {age > -18}");
try testParsing("GRAB User {age < 18}");
try testParsing("GRAB User {age <= 18}");
try testParsing("GRAB User {age >= 18}");
try testParsing("GRAB User {age != 18}");
}
test "GRAB filter with date" {
try testParsing("GRAB User {bday > 2000/01/01}");
try testParsing("GRAB User {a_time < 08:00}");
try testParsing("GRAB User {last_order > 2000/01/01-12:45}");
}
test "Specific query" {
try testParsing("GRAB User");
try testParsing("GRAB User {}");
try testParsing("GRAB User [1]");
}
test "Synthax error" {
try expectParsingError("GRAB {}", ZiQlParserError.StructNotFound);
try expectParsingError("GRAB User {qwe = 'qwe'}", ZiQlParserError.MemberNotFound);
try expectParsingError("ADD User (name='Bob')", ZiQlParserError.MemberMissing);
try expectParsingError("GRAB User {name='Bob'", ZiQlParserError.SynthaxError);
try expectParsingError("GRAB User {age = 50 name='Bob'}", ZiQlParserError.SynthaxError);
try expectParsingError("GRAB User {age <14 AND (age>55}", ZiQlParserError.SynthaxError);
try expectParsingError("GRAB User {name < 'Hello'}", ZiQlParserError.ConditionError);
}
fn testParsing(source: [:0]const u8) !void {
const TEST_DATA_DIR = @import("config.zig").TEST_DATA_DIR;
const allocator = std.testing.allocator;
@ -1073,3 +1000,26 @@ fn expectParsingError(source: [:0]const u8, err: ZiQlParserError) !void {
try std.testing.expectError(err, parser.parse());
}
test "New parser filter" {
try testParseFilter("name = 'Adrien'}");
try testParseFilter("name = 'Adrien' AND age > 11}");
try testParseFilter("name = 'Adrien' AND (age < 11 OR age > 40)}");
try testParseFilter("(name = 'Adrien') AND (age < 11 OR age > 40)}");
}
fn testParseFilter(source: [:0]const u8) !void {
const TEST_DATA_DIR = @import("config.zig").TEST_DATA_DIR;
const allocator = std.testing.allocator;
const path = try allocator.dupe(u8, TEST_DATA_DIR);
var file_engine = FileEngine.init(allocator, path);
defer file_engine.deinit();
var tokenizer = Tokenizer.init(source);
var parser = Parser.init(allocator, &tokenizer, &file_engine);
var filter = try parser.parseFilter("User");
defer filter.deinit();
filter.debugPrint();
}