Moved ZipponData to lib

This commit is contained in:
Adrien Bouvais 2024-11-12 21:17:33 +01:00
parent f5d93c94f6
commit b1de4a40c3
3 changed files with 841 additions and 9 deletions

View File

@ -19,13 +19,22 @@ pub fn build(b: *std.Build) void {
exe.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") }));
// Import ZipponData package
exe.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData"));
exe.root_module.addImport("ZipponData", b.createModule(.{ .root_source_file = b.path("lib/zid.zig") }));
// Run step
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
// All tests
const tests1 = b.addTest(.{
.root_source_file = b.path("src/stuffs/UUIDTree.zig"),
.target = target,
.optimize = optimize,
.name = "CLI tokenizer",
.test_runner = b.path("test_runner.zig"),
});
tests1.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") }));
const run_tests1 = b.addRunArtifact(tests1);
const tests2 = b.addTest(.{
.root_source_file = b.path("src/tokenizers/cli.zig"),
@ -63,7 +72,7 @@ pub fn build(b: *std.Build) void {
.test_runner = b.path("test_runner.zig"),
});
tests5.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") }));
tests5.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData"));
tests5.root_module.addImport("ZipponData", b.createModule(.{ .root_source_file = b.path("lib/zid.zig") }));
const run_tests5 = b.addRunArtifact(tests5);
const tests6 = b.addTest(.{
@ -74,10 +83,11 @@ pub fn build(b: *std.Build) void {
.test_runner = b.path("test_runner.zig"),
});
tests6.root_module.addImport("dtype", b.createModule(.{ .root_source_file = b.path("lib/types/out.zig") }));
tests6.root_module.addImport("ZipponData", b.dependency("ZipponData", .{}).module("ZipponData"));
tests6.root_module.addImport("ZipponData", b.createModule(.{ .root_source_file = b.path("lib/zid.zig") }));
const run_tests6 = b.addRunArtifact(tests6);
const test_step = b.step("test", "Run unit tests");
test_step.dependOn(&run_tests1.step);
test_step.dependOn(&run_tests2.step);
test_step.dependOn(&run_tests3.step);
test_step.dependOn(&run_tests4.step);

View File

@ -1,12 +1,7 @@
.{
.name = "ZipponDB",
.version = "0.1.4",
.dependencies = .{
.ZipponData = .{
.url = "git+https://github.com/MrBounty/ZipponData#237a1f546e8e0bd68c786081ef454694244e6221",
.hash = "12207d024d13697ab989ec54bbb3e24e24485da5ceb29f91101cacf43c98aac30ca4",
},
},
.dependencies = .{},
.paths = .{
"",
},

827
lib/zid.zig Normal file
View File

@ -0,0 +1,827 @@
const std = @import("std");
// Maybe make buffer infinite with arrayList, but this is faster I think
// Maybe give the option ? Like 2 kind of reader ? One with an arrayList as arg
// I like this, I think I will do it. But later, at least I can see a way to keep the same API and use ArrayList as main buffer
const STRING_BUFFER_LENGTH = 1024 * 64 * 64; // Around 4.2Mbyte
var string_buffer: [STRING_BUFFER_LENGTH]u8 = undefined;
const ARRAY_BUFFER_LENGTH = 1024 * 64 * 64; // Around 4.2Mbyte
var array_buffer: [ARRAY_BUFFER_LENGTH]u8 = undefined;
pub const DType = enum {
Int,
Float,
Str,
Bool,
UUID,
Unix,
IntArray,
FloatArray,
StrArray,
BoolArray,
UUIDArray,
UnixArray,
// I dont really like that there is a sperate function but ok
// I had to do that so I can pass a second argument
fn readStr(_: DType, reader: anytype, str_index: *usize) !Data {
// Read the length of the string
var len_buffer: [4]u8 = undefined;
_ = try reader.readAtLeast(len_buffer[0..], @sizeOf(u32));
const len = @as(usize, @intCast(std.mem.bytesToValue(u32, &len_buffer)));
const end = str_index.* + len;
if (end > string_buffer.len) return error.BufferFull;
// Read the string
_ = try reader.readAtLeast(string_buffer[str_index.*..end], len);
const data = Data{ .Str = string_buffer[str_index.*..end] };
str_index.* += len;
return data;
}
fn readArray(self: DType, reader: anytype, array_index: *usize) !Data {
// First 8 byte of an array is the number of u8 that take this array
// This speed up the reading and allow str array easely
var len_buffer: [8]u8 = undefined;
_ = try reader.readAtLeast(len_buffer[0..], @sizeOf(u64));
const len = @as(usize, @intCast(std.mem.bytesToValue(u64, &len_buffer)));
// Get the end of the slice use in the array buffer and check if not too long
const origin = array_index.*;
const start = array_index.* + @sizeOf(u64);
const end = start + len;
if (end > array_buffer.len) return error.BufferFull;
// Copy the len of the array and read all value
@memcpy(array_buffer[array_index.*..start], len_buffer[0..]);
_ = try reader.readAtLeast(array_buffer[start..end], len);
array_index.* = end;
return switch (self) {
.IntArray => Data{ .IntArray = array_buffer[origin..end] },
.FloatArray => Data{ .FloatArray = array_buffer[origin..end] },
.BoolArray => Data{ .BoolArray = array_buffer[origin..end] },
.UUIDArray => Data{ .UUIDArray = array_buffer[origin..end] },
.UnixArray => Data{ .UnixArray = array_buffer[origin..end] },
.StrArray => Data{ .StrArray = array_buffer[origin..end] },
else => unreachable,
};
}
fn read(self: DType, reader: anytype) !Data {
switch (self) {
.Int => {
var buffer: [@sizeOf(i32)]u8 = undefined;
_ = try reader.readAtLeast(buffer[0..], @sizeOf(i32));
return Data{ .Int = std.mem.bytesToValue(i32, &buffer) };
},
.Float => {
var buffer: [@sizeOf(f64)]u8 = undefined;
_ = try reader.readAtLeast(buffer[0..], @sizeOf(f64));
return Data{ .Float = std.mem.bytesToValue(f64, &buffer) };
},
.Bool => {
var buffer: [@sizeOf(bool)]u8 = undefined;
_ = try reader.readAtLeast(buffer[0..], @sizeOf(bool));
return Data{ .Bool = std.mem.bytesToValue(bool, &buffer) };
},
.UUID => {
var buffer: [@sizeOf([16]u8)]u8 = undefined;
_ = try reader.readAtLeast(buffer[0..], @sizeOf([16]u8));
return Data{ .UUID = std.mem.bytesToValue([16]u8, &buffer) };
},
.Unix => {
var buffer: [@sizeOf(u64)]u8 = undefined;
_ = try reader.readAtLeast(buffer[0..], @sizeOf(u64));
return Data{ .Unix = std.mem.bytesToValue(u64, &buffer) };
},
else => unreachable,
}
}
};
pub const Data = union(DType) {
Int: i32,
Float: f64,
Str: []const u8,
Bool: bool,
UUID: [16]u8,
Unix: u64,
IntArray: []const u8,
FloatArray: []const u8,
StrArray: []const u8,
BoolArray: []const u8,
UUIDArray: []const u8,
UnixArray: []const u8,
/// Number of bytes that will be use in the file
pub fn size(self: Data) usize {
return switch (self) {
.Int => @sizeOf(i32),
.Float => @sizeOf(f64),
.Str => 4 + self.Str.len,
.Bool => @sizeOf(bool),
.UUID => @sizeOf([16]u8),
.Unix => @sizeOf(u64),
.IntArray => self.IntArray.len,
.FloatArray => self.FloatArray.len,
.StrArray => self.StrArray.len,
.BoolArray => self.BoolArray.len,
.UUIDArray => self.UUIDArray.len,
.UnixArray => self.UnixArray.len,
};
}
/// Write the value in bytes
fn write(self: Data, writer: anytype) !void {
switch (self) {
.Str => |v| {
const len = @as(u32, @intCast(v.len));
try writer.writeAll(std.mem.asBytes(&len));
try writer.writeAll(v);
},
.UUID => |v| try writer.writeAll(&v),
.Int => |v| try writer.writeAll(std.mem.asBytes(&v)),
.Float => |v| try writer.writeAll(std.mem.asBytes(&v)),
.Bool => |v| try writer.writeAll(std.mem.asBytes(&v)),
.Unix => |v| try writer.writeAll(std.mem.asBytes(&v)),
.StrArray => |v| try writer.writeAll(v),
.UUIDArray => |v| try writer.writeAll(v),
.IntArray => |v| try writer.writeAll(v),
.FloatArray => |v| try writer.writeAll(v),
.BoolArray => |v| try writer.writeAll(v),
.UnixArray => |v| try writer.writeAll(v),
}
}
pub fn initInt(value: i32) Data {
return Data{ .Int = value };
}
pub fn initFloat(value: f64) Data {
return Data{ .Float = value };
}
pub fn initStr(value: []const u8) Data {
return Data{ .Str = value };
}
pub fn initBool(value: bool) Data {
return Data{ .Bool = value };
}
pub fn initUUID(value: [16]u8) Data {
return Data{ .UUID = value };
}
pub fn initUnix(value: u64) Data {
return Data{ .Unix = value };
}
pub fn initIntArray(value: []const u8) Data {
return Data{ .IntArray = value };
}
pub fn initFloatArray(value: []const u8) Data {
return Data{ .FloatArray = value };
}
pub fn initStrArray(value: []const u8) Data {
return Data{ .StrArray = value };
}
pub fn initBoolArray(value: []const u8) Data {
return Data{ .BoolArray = value };
}
pub fn initUUIDArray(value: []const u8) Data {
return Data{ .UUIDArray = value };
}
pub fn initUnixArray(value: []const u8) Data {
return Data{ .UnixArray = value };
}
};
// I know, I know I use @sizeOf too much, but I like it. Allow me to understand what it represent
/// Take an array of zig type and return an encoded version to use with Data.initType
/// Like that: Data.initIntArray(try allocEncodArray.Int(my_array))
/// Don't forget to free it! allocator.free(data.IntArray)
pub const allocEncodArray = struct {
pub fn Int(allocator: std.mem.Allocator, items: []const i32) ![]const u8 {
// Create a buffer of the right size
var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(i32) * items.len);
// Ge the len use by the array in bytes, array len not included (The first 8 bytes)
const items_len: u64 = items.len * @sizeOf(i32);
// Write the first 8 bytes as the number of items in the array
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));
// Write all value in the array
var start: usize = @sizeOf(u64);
for (items) |item| {
const end: usize = start + @sizeOf(i32);
@memcpy(buffer[start..end], std.mem.asBytes(&item));
start = end;
}
return buffer;
}
pub fn Float(allocator: std.mem.Allocator, items: []const f64) ![]const u8 {
var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(f64) * items.len);
const items_len: u64 = items.len * @sizeOf(f64);
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));
var start: usize = @sizeOf(u64);
for (items) |item| {
const end: usize = start + @sizeOf(f64);
@memcpy(buffer[start..end], std.mem.asBytes(&item));
start = end;
}
return buffer;
}
pub fn Bool(allocator: std.mem.Allocator, items: []const bool) ![]const u8 {
var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(bool) * items.len);
const items_len: u64 = items.len * @sizeOf(bool);
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));
var start: usize = @sizeOf(u64);
for (items) |item| {
const end: usize = start + @sizeOf(bool);
@memcpy(buffer[start..end], std.mem.asBytes(&item));
start = end;
}
return buffer;
}
pub fn UUID(allocator: std.mem.Allocator, items: []const [16]u8) ![]const u8 {
var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf([16]u8) * items.len);
const items_len: u64 = items.len * @sizeOf([16]u8);
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));
var start: usize = @sizeOf(u64);
for (items) |item| {
const end: usize = start + @sizeOf([16]u8);
@memcpy(buffer[start..end], &item);
start = end;
}
return buffer;
}
pub fn Unix(allocator: std.mem.Allocator, items: []const u64) ![]const u8 {
var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(u64) * items.len);
const items_len: u64 = items.len * @sizeOf(u64);
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));
var start: usize = @sizeOf(u64);
for (items) |item| {
const end: usize = start + @sizeOf(u64);
@memcpy(buffer[start..end], std.mem.asBytes(&item));
start = end;
}
return buffer;
}
pub fn Str(allocator: std.mem.Allocator, items: []const []const u8) ![]const u8 {
var total_len: usize = @sizeOf(u64);
for (items) |item| {
total_len += @sizeOf(u64) + @sizeOf(u8) * item.len;
}
var buffer = try allocator.alloc(u8, total_len);
// Write the total number of bytes used by this array as the first 8 bytes. Those first 8 are not included
@memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&(total_len - @sizeOf(u64))));
// Write the rest, the number of u8 then the array itself, repeat
var start: usize = @sizeOf(u64);
var end: usize = 0;
for (items) |item| {
// First write the len of the str
end = start + @sizeOf(u64);
@memcpy(buffer[start..end], std.mem.asBytes(&item.len));
end += item.len;
@memcpy(buffer[(start + @sizeOf(u64))..end], item);
start = end;
}
return buffer;
}
};
/// This take the name of a file and a schema and return an iterator.
/// You can then use it in a while loop and it will yeild []Data type.
/// One for each write. This is basically like a row in a table.
pub const DataIterator = struct {
allocator: std.mem.Allocator,
file: std.fs.File,
reader: std.io.BufferedReader(4096, std.fs.File.Reader), // Use ArrayList reader maybe ?
schema: []const DType,
data: []Data,
index: usize = 0,
file_len: usize,
str_index: usize = 0,
array_index: usize = 0,
pub fn init(allocator: std.mem.Allocator, name: []const u8, dir: ?std.fs.Dir, schema: []const DType) !DataIterator {
const d_ = dir orelse std.fs.cwd();
const file = try d_.openFile(name, .{ .mode = .read_only });
return DataIterator{
.allocator = allocator,
.file = file,
.schema = schema,
.reader = std.io.bufferedReader(file.reader()),
.data = try allocator.alloc(Data, schema.len),
.file_len = try file.getEndPos(),
};
}
pub fn deinit(self: *DataIterator) void {
self.allocator.free(self.data);
self.file.close();
}
pub fn next(self: *DataIterator) !?[]Data {
self.str_index = 0;
self.array_index = 0;
if (self.index >= self.file_len) return null;
var i: usize = 0;
while (i < self.schema.len) : (i += 1) {
self.data[i] = switch (self.schema[i]) {
.Str => try self.schema[i].readStr(self.reader.reader(), &self.str_index),
.IntArray,
.FloatArray,
.BoolArray,
.StrArray,
.UUIDArray,
.UnixArray,
=> try self.schema[i].readArray(self.reader.reader(), &self.array_index),
else => try self.schema[i].read(self.reader.reader()),
};
self.index += self.data[i].size();
}
return self.data;
}
};
/// When using DataIterator, if one Data is an array (like IntArray). You need to use that to create a sub iterator that return the Data inside the array.
/// This is mainly for performance reason as you only iterate an array if needed, otherwise it is just a big blob of u8, like a str
pub const ArrayIterator = struct {
data: Data,
end: usize,
index: usize,
pub fn init(data: Data) !ArrayIterator {
const len = switch (data) {
.IntArray,
.FloatArray,
.BoolArray,
.StrArray,
.UUIDArray,
.UnixArray,
=> |buffer| @as(usize, @intCast(std.mem.bytesToValue(u64, buffer[0..@sizeOf(u64)]))) + @sizeOf(u64),
else => return error.NonArrayDType,
};
return ArrayIterator{
.data = data,
.end = len,
.index = @sizeOf(u64),
};
}
pub fn next(self: *ArrayIterator) ?Data {
if (self.index >= self.end) return null;
switch (self.data) {
.IntArray => |buffer| {
self.index += @sizeOf(i32);
return Data{ .Int = std.mem.bytesToValue(i32, buffer[(self.index - @sizeOf(i32))..self.index]) };
},
.FloatArray => |buffer| {
self.index += @sizeOf(f64);
return Data{ .Float = std.mem.bytesToValue(f64, buffer[(self.index - @sizeOf(f64))..self.index]) };
},
.BoolArray => |buffer| {
self.index += @sizeOf(bool);
return Data{ .Bool = std.mem.bytesToValue(bool, buffer[(self.index - @sizeOf(bool))..self.index]) };
},
.UUIDArray => |buffer| {
self.index += @sizeOf([16]u8);
return Data{ .UUID = std.mem.bytesToValue([16]u8, buffer[(self.index - @sizeOf([16]u8))..self.index]) };
},
.UnixArray => |buffer| {
self.index += @sizeOf(u64);
return Data{ .Unix = std.mem.bytesToValue(u64, buffer[(self.index - @sizeOf(u64))..self.index]) };
},
.StrArray => |buffer| {
// Read first 8 bytes as len, copy it into the buffer then return the slice
const len = @as(usize, @intCast(std.mem.bytesToValue(u64, buffer[self.index..(self.index + @sizeOf(u64))])));
self.index += @sizeOf(u64) + len;
return Data{ .Str = buffer[(self.index - len)..self.index] };
},
else => unreachable,
}
}
};
/// A data writer to write into a file. I use a struct so I can use a buffer and improve perf
/// I added a seperated flush method, to not flush at each write. Otherwise it is very long
/// Performance concern once again.
pub const DataWriter = struct {
file: std.fs.File,
writer: std.io.BufferedWriter(4096, std.fs.File.Writer),
pub fn init(name: []const u8, dir: ?std.fs.Dir) !DataWriter {
const d_ = dir orelse std.fs.cwd();
const file = try d_.openFile(name, .{ .mode = .write_only });
try file.seekFromEnd(0);
return DataWriter{
.file = file,
.writer = std.io.bufferedWriter(file.writer()),
};
}
pub fn deinit(self: *DataWriter) void {
self.file.close();
}
pub fn write(self: *DataWriter, data: []const Data) !void {
for (data) |d| try d.write(self.writer.writer());
}
pub fn flush(self: *DataWriter) !void {
try self.writer.flush();
}
};
/// Create a new data file that can then be use by the DataWriter
pub fn createFile(name: []const u8, dir: ?std.fs.Dir) !void {
const d = dir orelse std.fs.cwd();
const file = try d.createFile(name, .{});
defer file.close();
}
/// Self explainatory.
pub fn deleteFile(name: []const u8, dir: ?std.fs.Dir) !void {
const d = dir orelse std.fs.cwd();
try d.deleteFile(name);
}
/// Just to keep a similar API
pub fn statFile(name: []const u8, dir: ?std.fs.Dir) !std.fs.File.Stat {
const d = dir orelse std.fs.cwd();
return d.statFile(name);
}
// I have almost more lines of test than the real stuff x)
// But I think everything is tested to be fair, so good stuff
// It also write benchmark so you can benchmark on your own hardware
// The data write and read is not really representative of real worl tho
test "Array Iterators" {
const allocator = std.testing.allocator;
try std.fs.cwd().makeDir("array_tmp");
var dir = try std.fs.cwd().openDir("array_tmp", .{});
defer {
dir.close();
std.fs.cwd().deleteDir("array_tmp") catch {};
}
// Test data
const int_array = [_]i32{ 32, 11, 15, 99 };
const float_array = [_]f64{ 3.14, 2.718, 1.414, 0.577 };
const bool_array = [_]bool{ true, false, true, false };
const uuid_array = [_][16]u8{
[_]u8{0} ** 16,
[_]u8{1} ** 16,
[_]u8{2} ** 16,
[_]u8{3} ** 16,
};
const unix_array = [_]u64{ 1623456789, 1623456790, 1623456791, 1623456792 };
const str_array = [_][]const u8{ "Hello", " world" };
const data = [_]Data{
Data.initIntArray(try allocEncodArray.Int(allocator, &int_array)),
Data.initFloatArray(try allocEncodArray.Float(allocator, &float_array)),
Data.initBoolArray(try allocEncodArray.Bool(allocator, &bool_array)),
Data.initUUIDArray(try allocEncodArray.UUID(allocator, &uuid_array)),
Data.initUnixArray(try allocEncodArray.Unix(allocator, &unix_array)),
Data.initStrArray(try allocEncodArray.Str(allocator, &str_array)),
};
defer {
allocator.free(data[0].IntArray);
allocator.free(data[1].FloatArray);
allocator.free(data[2].BoolArray);
allocator.free(data[3].UUIDArray);
allocator.free(data[4].UnixArray);
allocator.free(data[5].StrArray);
}
// Write data to file
try createFile("test_arrays", dir);
var dwriter = try DataWriter.init("test_arrays", dir);
defer dwriter.deinit();
try dwriter.write(&data);
try dwriter.flush();
// Read and verify data
const schema = &[_]DType{ .IntArray, .FloatArray, .BoolArray, .UUIDArray, .UnixArray, .StrArray };
var iter = try DataIterator.init(allocator, "test_arrays", dir, schema);
defer iter.deinit();
if (try iter.next()) |row| {
// Int Array
{
var array_iter = try ArrayIterator.init(row[0]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectEqual(int_array[i], d.Int);
i += 1;
}
try std.testing.expectEqual(int_array.len, i);
}
// Float Array
{
var array_iter = try ArrayIterator.init(row[1]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectApproxEqAbs(float_array[i], d.Float, 0.0001);
i += 1;
}
try std.testing.expectEqual(float_array.len, i);
}
// Bool Array
{
var array_iter = try ArrayIterator.init(row[2]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectEqual(bool_array[i], d.Bool);
i += 1;
}
try std.testing.expectEqual(bool_array.len, i);
}
// UUID Array
{
var array_iter = try ArrayIterator.init(row[3]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectEqualSlices(u8, &uuid_array[i], &d.UUID);
i += 1;
}
try std.testing.expectEqual(uuid_array.len, i);
}
// Unix Array
{
var array_iter = try ArrayIterator.init(row[4]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectEqual(unix_array[i], d.Unix);
i += 1;
}
try std.testing.expectEqual(unix_array.len, i);
}
// Str Array
{
var array_iter = try ArrayIterator.init(row[5]);
var i: usize = 0;
while (array_iter.next()) |d| {
try std.testing.expectEqualStrings(str_array[i], d.Str);
i += 1;
}
try std.testing.expectEqual(str_array.len, i);
}
} else {
return error.TestUnexpectedNull;
}
try deleteFile("test_arrays", dir);
}
test "Write and Read" {
const allocator = std.testing.allocator;
try std.fs.cwd().makeDir("tmp");
const dir = try std.fs.cwd().openDir("tmp", .{});
const data = [_]Data{
Data.initInt(1),
Data.initFloat(3.14159),
Data.initInt(-5),
Data.initStr("Hello world"),
Data.initBool(true),
Data.initUnix(12476),
Data.initStr("Another string =)"),
};
try createFile("test", dir);
var dwriter = try DataWriter.init("test", dir);
defer dwriter.deinit();
try dwriter.write(&data);
try dwriter.flush();
const schema = &[_]DType{
.Int,
.Float,
.Int,
.Str,
.Bool,
.Unix,
.Str,
};
var iter = try DataIterator.init(allocator, "test", dir, schema);
defer iter.deinit();
if (try iter.next()) |row| {
try std.testing.expectEqual(1, row[0].Int);
try std.testing.expectApproxEqAbs(3.14159, row[1].Float, 0.00001);
try std.testing.expectEqual(-5, row[2].Int);
try std.testing.expectEqualStrings("Hello world", row[3].Str);
try std.testing.expectEqual(true, row[4].Bool);
try std.testing.expectEqual(12476, row[5].Unix);
try std.testing.expectEqualStrings("Another string =)", row[6].Str);
} else {
return error.TestUnexpectedNull;
}
try deleteFile("test", dir);
try std.fs.cwd().deleteDir("tmp");
}
test "Benchmark Write and Read" {
const schema = &[_]DType{
.Int,
.Float,
.Int,
.Str,
.Bool,
.Unix,
};
const data = &[_]Data{
Data.initInt(1),
Data.initFloat(3.14159),
Data.initInt(-5),
Data.initStr("Hello world"),
Data.initBool(true),
Data.initUnix(2021),
};
try benchmark(schema, data);
}
fn benchmark(schema: []const DType, data: []const Data) !void {
const allocator = std.testing.allocator;
const sizes = [_]usize{ 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000 };
try std.fs.cwd().makeDir("benchmark_tmp");
const dir = try std.fs.cwd().openDir("benchmark_tmp", .{});
defer std.fs.cwd().deleteDir("benchmark_tmp") catch {};
for (sizes) |size| {
std.debug.print("\nBenchmarking with {d} rows:\n", .{size});
// Benchmark write
const write_start = std.time.nanoTimestamp();
try createFile("benchmark", dir);
var dwriter = try DataWriter.init("benchmark", dir);
defer dwriter.deinit();
for (0..size) |_| try dwriter.write(data);
try dwriter.flush();
const write_end = std.time.nanoTimestamp();
const write_duration = @as(f64, @floatFromInt(write_end - write_start)) / 1e6;
std.debug.print("Write time: {d:.6} ms\n", .{write_duration});
std.debug.print("Average write time: {d:.2} μs\n", .{write_duration / @as(f64, @floatFromInt(size)) * 1000});
// Benchmark read
const read_start = std.time.nanoTimestamp();
var iter = try DataIterator.init(allocator, "benchmark", dir, schema);
defer iter.deinit();
var count: usize = 0;
while (try iter.next()) |_| {
count += 1;
}
const read_end = std.time.nanoTimestamp();
const read_duration = @as(f64, @floatFromInt(read_end - read_start)) / 1e6;
std.debug.print("Read time: {d:.6} ms\n", .{read_duration});
std.debug.print("Average read time: {d:.2} μs\n", .{read_duration / @as(f64, @floatFromInt(size)) * 1000});
try std.testing.expectEqual(size, count);
std.debug.print("{any}", .{statFile("benchmark", dir)});
try deleteFile("benchmark", dir);
std.debug.print("\n", .{});
}
}
test "Benchmark Type" {
const random = std.crypto.random;
const uuid = [16]u8{
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
random.int(u8),
};
try benchmarkType(.Int, Data.initInt(random.int(i32)));
try benchmarkType(.Float, Data.initFloat(random.float(f64)));
try benchmarkType(.Bool, Data.initBool(random.boolean()));
try benchmarkType(.Str, Data.initStr("Hello world"));
try benchmarkType(.UUID, Data.initUUID(uuid));
try benchmarkType(.Unix, Data.initUnix(random.int(u64)));
}
fn benchmarkType(dtype: DType, data: Data) !void {
const allocator = std.testing.allocator;
const size = 1_000_000;
try std.fs.cwd().makeDir("benchmark_type_tmp");
const dir = try std.fs.cwd().openDir("benchmark_type_tmp", .{});
defer std.fs.cwd().deleteDir("benchmark_type_tmp") catch {};
std.debug.print("\nBenchmarking with {any} rows:\n", .{dtype});
// Benchmark write
const write_start = std.time.nanoTimestamp();
try createFile("benchmark", dir);
const datas = &[_]Data{data};
var dwriter = try DataWriter.init("benchmark", dir);
defer dwriter.deinit();
for (0..size) |_| try dwriter.write(datas);
try dwriter.flush();
const write_end = std.time.nanoTimestamp();
const write_duration = @as(f64, @floatFromInt(write_end - write_start)) / 1e6;
std.debug.print("Write time: {d:.6} ms\n", .{write_duration});
const schema = &[_]DType{dtype};
// Benchmark read
const read_start = std.time.nanoTimestamp();
var iter = try DataIterator.init(allocator, "benchmark", dir, schema);
defer iter.deinit();
var count: usize = 0;
while (try iter.next()) |_| {
count += 1;
}
const read_end = std.time.nanoTimestamp();
const read_duration = @as(f64, @floatFromInt(read_end - read_start)) / 1e6;
std.debug.print("Read time: {d:.6} ms\n", .{read_duration});
try std.testing.expectEqual(size, count);
std.debug.print("{any}", .{statFile("benchmark", dir)});
try deleteFile("benchmark", dir);
std.debug.print("\n", .{});
}