Speed up batch ADD and better bechmark
Now I flush only when the file is full and I check the the currently used file if it is big enough. So I dont get stat of all files and flush everytime like before
This commit is contained in:
parent
e3264d8553
commit
2a4842432d
@ -8,6 +8,9 @@ const ZipponError = @import("src/stuffs/errors.zig").ZipponError;
|
||||
|
||||
const names = [_][]const u8{ "Alice", "Bob", "Charlie", "Dave", "Eve" };
|
||||
const emails = [_][]const u8{ "alice@email.com", "bob@email.com", "charlie@email.com", "dave@email.com", "eve@email.com" };
|
||||
const dates = [_][]const u8{ "2000/01/01", "1954/04/02", "1998/01/21", "1977/12/31" };
|
||||
const times = [_][]const u8{ "12:04", "20:45:11", "03:11:13", "03:00:01.0152" };
|
||||
const datetimes = [_][]const u8{ "2000/01/01-12:04", "1954/04/02-20:45:11", "1998/01/21-03:11:13", "1977/12/31-03:00:01.0153" };
|
||||
const scores = [_]i32{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
||||
|
||||
pub const std_options = .{
|
||||
@ -28,34 +31,63 @@ pub fn myLog(
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
const to_test = [_]usize{ 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000 };
|
||||
const to_test = [_]usize{50_000};
|
||||
var line_buffer: [1024 * 1024]u8 = undefined;
|
||||
// Initialize your DBEngine here
|
||||
var db_engine = DBEngine.init("benchmark", "schema/example");
|
||||
defer db_engine.deinit();
|
||||
|
||||
for (to_test) |users_count| {
|
||||
// Populate with random dummy value
|
||||
// Need some speed up, spended times to find that it is the parsonConditionValue that take time, the last switch to be exact, that parse str to value
|
||||
{
|
||||
std.debug.print("\n=====================================\n\n", .{});
|
||||
std.debug.print("Populating with {d} users.\n", .{users_count});
|
||||
|
||||
var gpa = std.rand.DefaultPrng.init(0);
|
||||
const allocator = std.heap.page_allocator;
|
||||
|
||||
var prng = std.rand.DefaultPrng.init(0);
|
||||
const rng = prng.random();
|
||||
const populate_start_time = std.time.nanoTimestamp();
|
||||
for (users_count) |_| {
|
||||
const name = names[gpa.random().uintAtMost(usize, names.len - 1)];
|
||||
const email = emails[gpa.random().uintAtMost(usize, emails.len - 1)];
|
||||
const age = gpa.random().uintAtMost(usize, 100);
|
||||
const score = scores[gpa.random().uintAtMost(usize, scores.len - 1)];
|
||||
const null_term_query_str = try std.fmt.bufPrintZ(
|
||||
&line_buffer,
|
||||
"ADD User (name = '{s}', email='{s}', age={d}, scores=[ {d} ], best_friend=none, friends=none, bday=2000/01/01, a_time=12:04, last_order=2000/01/01-12:45)",
|
||||
.{ name, email, age, score },
|
||||
|
||||
var array = std.ArrayList(u8).init(allocator);
|
||||
defer array.deinit();
|
||||
var writer = array.writer();
|
||||
|
||||
try writer.print(
|
||||
"ADD User (name = '{s}', email='{s}', age={d}, scores=[ {d} ], best_friend=none, friends=none, bday={s}, a_time={s}, last_order={s})",
|
||||
.{
|
||||
names[rng.uintAtMost(usize, names.len - 1)],
|
||||
emails[rng.uintAtMost(usize, emails.len - 1)],
|
||||
rng.uintAtMost(usize, 100),
|
||||
scores[rng.uintAtMost(usize, scores.len - 1)],
|
||||
dates[rng.uintAtMost(usize, dates.len - 1)],
|
||||
times[rng.uintAtMost(usize, times.len - 1)],
|
||||
datetimes[rng.uintAtMost(usize, datetimes.len - 1)],
|
||||
},
|
||||
);
|
||||
|
||||
for (users_count - 1) |_| {
|
||||
try writer.print(
|
||||
"('{s}', '{s}', {d}, [ {d} ], none, none, {s}, {s}, {s})",
|
||||
.{
|
||||
names[rng.uintAtMost(usize, names.len - 1)],
|
||||
emails[rng.uintAtMost(usize, emails.len - 1)],
|
||||
rng.uintAtMost(usize, 100),
|
||||
scores[rng.uintAtMost(usize, scores.len - 1)],
|
||||
dates[rng.uintAtMost(usize, dates.len - 1)],
|
||||
times[rng.uintAtMost(usize, times.len - 1)],
|
||||
datetimes[rng.uintAtMost(usize, datetimes.len - 1)],
|
||||
},
|
||||
);
|
||||
var toker = ziqlTokenizer.init(null_term_query_str);
|
||||
var parser = ziqlParser.init(&toker, &db_engine.file_engine, &db_engine.schema_engine);
|
||||
try parser.parse();
|
||||
}
|
||||
|
||||
const null_term_query_str = try std.fmt.allocPrintZ(allocator, "{s}", .{array.items});
|
||||
defer allocator.free(null_term_query_str);
|
||||
|
||||
var toker = ziqlTokenizer.init(null_term_query_str);
|
||||
var parser = ziqlParser.init(&toker, &db_engine.file_engine, &db_engine.schema_engine);
|
||||
try parser.parse();
|
||||
|
||||
const populate_end_time = std.time.nanoTimestamp();
|
||||
const populate_duration = @as(f64, @floatFromInt(populate_end_time - populate_start_time)) / 1e9;
|
||||
|
||||
@ -76,6 +108,7 @@ pub fn main() !void {
|
||||
"GRAB User [name] {}",
|
||||
"GRAB User {name = 'Charlie'}",
|
||||
"GRAB User {age > 30}",
|
||||
"GRAB User {bday > 2000/01/01}",
|
||||
"DELETE User {}",
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
// This file is just to expose what I need to grab
|
||||
|
||||
pub const UUID = @import("uuid.zig").UUID;
|
||||
pub const Zero = @import("uuid.zig").zero;
|
||||
pub const DateTime = @import("date.zig").DateTime;
|
||||
pub const OR = @import("uuid.zig").OR;
|
||||
pub const AND = @import("uuid.zig").AND;
|
||||
|
@ -509,7 +509,7 @@ pub const ArrayIterator = struct {
|
||||
/// Performance concern once again.
|
||||
pub const DataWriter = struct {
|
||||
file: std.fs.File,
|
||||
writer: std.io.BufferedWriter(4096, std.fs.File.Writer),
|
||||
writer: std.io.BufferedWriter(4096, std.fs.File.Writer), // TODO: Increase buffer size, this should speed up a bit
|
||||
|
||||
pub fn init(name: []const u8, dir: ?std.fs.Dir) !DataWriter {
|
||||
const d_ = dir orelse std.fs.cwd();
|
||||
@ -533,6 +533,10 @@ pub const DataWriter = struct {
|
||||
pub fn flush(self: *DataWriter) !void {
|
||||
try self.writer.flush();
|
||||
}
|
||||
|
||||
pub fn fileStat(self: DataWriter) !std.fs.File.Stat {
|
||||
return self.file.stat();
|
||||
}
|
||||
};
|
||||
|
||||
/// Create a new data file that can then be use by the DataWriter
|
||||
@ -557,7 +561,7 @@ pub fn statFile(name: []const u8, dir: ?std.fs.Dir) !std.fs.File.Stat {
|
||||
// I have almost more lines of test than the real stuff x)
|
||||
// But I think everything is tested to be fair, so good stuff
|
||||
// It also write benchmark so you can benchmark on your own hardware
|
||||
// The data write and read is not really representative of real worl tho
|
||||
// The data write and read is not really representative of real world tho
|
||||
|
||||
test "Array Iterators" {
|
||||
const allocator = std.testing.allocator;
|
||||
|
@ -685,26 +685,36 @@ pub const FileEngine = struct {
|
||||
pub fn addEntity(
|
||||
self: *FileEngine,
|
||||
struct_name: []const u8,
|
||||
map: std.StringHashMap(ConditionValue),
|
||||
maps: []std.StringHashMap(ConditionValue),
|
||||
writer: anytype,
|
||||
n: usize,
|
||||
) ZipponError!void {
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
defer arena.deinit();
|
||||
const allocator = arena.allocator();
|
||||
|
||||
const file_index = try self.getFirstUsableIndexFile(struct_name); // TODO: Speed up this
|
||||
|
||||
const path = std.fmt.bufPrint(&path_buffer, "{s}/DATA/{s}/{d}.zid", .{ self.path_to_ZipponDB_dir, struct_name, file_index }) catch return FileEngineError.MemoryError;
|
||||
const data = try self.orderedNewData(allocator, struct_name, map);
|
||||
var file_index = try self.getFirstUsableIndexFile(struct_name); // TODO: Speed up this
|
||||
var path = std.fmt.bufPrint(&path_buffer, "{s}/DATA/{s}/{d}.zid", .{ self.path_to_ZipponDB_dir, struct_name, file_index }) catch return FileEngineError.MemoryError;
|
||||
|
||||
var data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError;
|
||||
defer data_writer.deinit();
|
||||
|
||||
for (0..n) |_| data_writer.write(data) catch return FileEngineError.ZipponDataError;
|
||||
data_writer.flush() catch return FileEngineError.ZipponDataError;
|
||||
for (maps) |map| {
|
||||
const data = try self.orderedNewData(allocator, struct_name, map);
|
||||
data_writer.write(data) catch return FileEngineError.ZipponDataError;
|
||||
writer.print("\"{s}\", ", .{UUID.format_bytes(data[0].UUID)}) catch return FileEngineError.WriteError;
|
||||
|
||||
writer.print("\"{s}\", ", .{UUID.format_bytes(data[0].UUID)}) catch return FileEngineError.WriteError;
|
||||
const file_stat = data_writer.fileStat() catch return ZipponError.ZipponDataError;
|
||||
if (file_stat.size > MAX_FILE_SIZE) {
|
||||
file_index = try self.getFirstUsableIndexFile(struct_name);
|
||||
data_writer.flush() catch return FileEngineError.ZipponDataError;
|
||||
data_writer.deinit();
|
||||
|
||||
path = std.fmt.bufPrint(&path_buffer, "{s}/DATA/{s}/{d}.zid", .{ self.path_to_ZipponDB_dir, struct_name, file_index }) catch return FileEngineError.MemoryError;
|
||||
data_writer = zid.DataWriter.init(path, null) catch return FileEngineError.ZipponDataError;
|
||||
}
|
||||
}
|
||||
|
||||
data_writer.flush() catch return FileEngineError.ZipponDataError;
|
||||
}
|
||||
|
||||
pub fn updateEntities(
|
||||
|
@ -309,7 +309,7 @@ pub const Parser = struct {
|
||||
),
|
||||
},
|
||||
|
||||
// TODO: Be able to do it in batch
|
||||
// TODO: Speed up batch by flushing one time and speed up how to find which file to use
|
||||
.parse_new_data_and_add_data => {
|
||||
var order = std.ArrayList([]const u8).init(allocator);
|
||||
defer order.deinit();
|
||||
@ -319,9 +319,14 @@ pub const Parser = struct {
|
||||
defer buff.deinit();
|
||||
buff.writer().writeAll("[") catch return ZipponError.WriteError;
|
||||
|
||||
while (true) {
|
||||
var data_map = std.StringHashMap(ConditionValue).init(allocator);
|
||||
defer data_map.deinit();
|
||||
var maps = std.ArrayList(std.StringHashMap(ConditionValue)).init(allocator);
|
||||
defer maps.deinit();
|
||||
|
||||
var data_map = std.StringHashMap(ConditionValue).init(allocator);
|
||||
defer data_map.deinit();
|
||||
|
||||
while (true) { // I could multithread that as it do take a long time for big benchmark
|
||||
data_map.clearRetainingCapacity();
|
||||
try self.parseNewData(allocator, &data_map, struct_name, &order, ordered);
|
||||
ordered = true;
|
||||
|
||||
@ -345,12 +350,15 @@ pub const Parser = struct {
|
||||
);
|
||||
}
|
||||
|
||||
token = self.toker.last_token;
|
||||
self.file_engine.addEntity(struct_name, data_map, &buff.writer(), 1) catch return ZipponError.CantWriteEntity;
|
||||
maps.append(data_map.clone() catch return ZipponError.MemoryError) catch return ZipponError.MemoryError;
|
||||
|
||||
token = self.toker.last_token;
|
||||
if (token.tag == .l_paren) continue;
|
||||
break;
|
||||
}
|
||||
|
||||
self.file_engine.addEntity(struct_name, maps.items, &buff.writer()) catch return ZipponError.CantWriteEntity;
|
||||
|
||||
buff.writer().writeAll("]") catch return ZipponError.WriteError;
|
||||
send("{s}", .{buff.items});
|
||||
state = .end;
|
||||
@ -779,7 +787,7 @@ pub const Parser = struct {
|
||||
) !void {
|
||||
var token = self.toker.next();
|
||||
var keep_next = false;
|
||||
var member_name: []const u8 = undefined; // Maybe use allocator.alloc
|
||||
var member_name: []const u8 = undefined;
|
||||
var state: State = .expect_member_OR_value;
|
||||
var i: usize = 0;
|
||||
|
||||
@ -970,29 +978,30 @@ pub const Parser = struct {
|
||||
}
|
||||
|
||||
// And finally create the ConditionValue
|
||||
var value: ConditionValue = undefined;
|
||||
// FIXME: This take the majority of time when ADD in big batch. Need serious speed up. I aim to be able to load a simple 10MB query in less then 0.1s
|
||||
// Rn for 100_000 users for around 10Mb, it take 30s... I mean come on, 30s ? For 10MB ? That suck...
|
||||
switch (data_type) {
|
||||
.int => value = ConditionValue.initInt(self.toker.buffer[start_index..token.loc.end]),
|
||||
.float => value = ConditionValue.initFloat(self.toker.buffer[start_index..token.loc.end]),
|
||||
.str => value = ConditionValue.initStr(self.toker.buffer[start_index + 1 .. token.loc.end - 1]),
|
||||
.date => value = ConditionValue.initDate(self.toker.buffer[start_index..token.loc.end]),
|
||||
.time => value = ConditionValue.initTime(self.toker.buffer[start_index..token.loc.end]),
|
||||
.datetime => value = ConditionValue.initDateTime(self.toker.buffer[start_index..token.loc.end]),
|
||||
.bool => value = ConditionValue.initBool(self.toker.buffer[start_index..token.loc.end]),
|
||||
.int_array => value = try ConditionValue.initArrayInt(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.str_array => value = try ConditionValue.initArrayStr(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.bool_array => value = try ConditionValue.initArrayBool(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.float_array => value = try ConditionValue.initArrayFloat(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.date_array => value = try ConditionValue.initArrayDate(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.time_array => value = try ConditionValue.initArrayTime(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.datetime_array => value = try ConditionValue.initArrayDateTime(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.int => return ConditionValue.initInt(self.toker.buffer[start_index..token.loc.end]),
|
||||
.float => return ConditionValue.initFloat(self.toker.buffer[start_index..token.loc.end]),
|
||||
.str => return ConditionValue.initStr(self.toker.buffer[start_index + 1 .. token.loc.end - 1]),
|
||||
.date => return ConditionValue.initDate(self.toker.buffer[start_index..token.loc.end]),
|
||||
.time => return ConditionValue.initTime(self.toker.buffer[start_index..token.loc.end]),
|
||||
.datetime => return ConditionValue.initDateTime(self.toker.buffer[start_index..token.loc.end]),
|
||||
.bool => return ConditionValue.initBool(self.toker.buffer[start_index..token.loc.end]),
|
||||
.int_array => return try ConditionValue.initArrayInt(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.str_array => return try ConditionValue.initArrayStr(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.bool_array => return try ConditionValue.initArrayBool(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.float_array => return try ConditionValue.initArrayFloat(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.date_array => return try ConditionValue.initArrayDate(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.time_array => return try ConditionValue.initArrayTime(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.datetime_array => return try ConditionValue.initArrayDateTime(allocator, self.toker.buffer[start_index..token.loc.end]),
|
||||
.link => switch (token.tag) {
|
||||
.keyword_none => {
|
||||
.keyword_none => { // TODO: Stop creating a map if empty, can be null or something. Or maybe just keep one map link that in memory, so I dont create it everytime
|
||||
const map = allocator.create(std.AutoHashMap(UUID, void)) catch return ZipponError.MemoryError;
|
||||
map.* = std.AutoHashMap(UUID, void).init(allocator);
|
||||
_ = map.getOrPut(UUID.parse("00000000-0000-0000-0000-000000000000") catch @panic("Sorry wot ?")) catch return ZipponError.MemoryError;
|
||||
value = ConditionValue.initLink(map);
|
||||
map.put(dtype.Zero, {}) catch return ZipponError.MemoryError;
|
||||
_ = self.toker.next();
|
||||
return ConditionValue.initLink(map);
|
||||
},
|
||||
.uuid_literal => {
|
||||
const uuid = UUID.parse(self.toker.buffer[start_index..token.loc.end]) catch return ZipponError.InvalidUUID;
|
||||
@ -1006,9 +1015,9 @@ pub const Parser = struct {
|
||||
|
||||
const map = allocator.create(std.AutoHashMap(UUID, void)) catch return ZipponError.MemoryError;
|
||||
map.* = std.AutoHashMap(UUID, void).init(allocator);
|
||||
_ = map.getOrPut(uuid) catch return ZipponError.MemoryError;
|
||||
value = ConditionValue.initLink(map);
|
||||
map.put(uuid, {}) catch return ZipponError.MemoryError;
|
||||
_ = self.toker.next();
|
||||
return ConditionValue.initLink(map);
|
||||
},
|
||||
.l_brace, .l_bracket => {
|
||||
var filter: ?Filter = null;
|
||||
@ -1040,7 +1049,7 @@ pub const Parser = struct {
|
||||
map,
|
||||
&additional_data,
|
||||
);
|
||||
value = ConditionValue.initLink(map);
|
||||
return ConditionValue.initLink(map);
|
||||
},
|
||||
|
||||
else => return printError(
|
||||
@ -1055,8 +1064,8 @@ pub const Parser = struct {
|
||||
.keyword_none => {
|
||||
const map = allocator.create(std.AutoHashMap(UUID, void)) catch return ZipponError.MemoryError;
|
||||
map.* = std.AutoHashMap(UUID, void).init(allocator);
|
||||
value = ConditionValue.initArrayLink(map);
|
||||
_ = self.toker.next();
|
||||
return ConditionValue.initArrayLink(map);
|
||||
},
|
||||
.l_brace, .l_bracket => {
|
||||
var filter: ?Filter = null;
|
||||
@ -1088,7 +1097,7 @@ pub const Parser = struct {
|
||||
map,
|
||||
&additional_data,
|
||||
);
|
||||
value = ConditionValue.initArrayLink(map);
|
||||
return ConditionValue.initArrayLink(map);
|
||||
},
|
||||
else => return printError(
|
||||
"Error: Expected uuid or none",
|
||||
@ -1100,8 +1109,6 @@ pub const Parser = struct {
|
||||
},
|
||||
.self => unreachable,
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
/// Check if all token in an array is of one specific type
|
||||
|
Loading…
x
Reference in New Issue
Block a user