ZipponDB/lib/zid.zig

const std = @import("std");

// Maybe make buffer infinite with arrayList, but this is faster I think
// Maybe give the option ? Like 2 kind of reader ? One with an arrayList as arg
// I like this, I think I will do it. But later, at least I can see a way to keep the same API and use ArrayList as main buffer

const STRING_BUFFER_LENGTH = 1024 * 64 * 64; // Around 4.2Mbyte
var string_buffer: [STRING_BUFFER_LENGTH]u8 = undefined;

const ARRAY_BUFFER_LENGTH = 1024 * 64 * 64; // Around 4.2Mbyte
var array_buffer: [ARRAY_BUFFER_LENGTH]u8 = undefined;

pub const DType = enum {
    Int,
    Float,
    Str,
    Bool,
    UUID,
    Unix,

    IntArray,
    FloatArray,
    StrArray,
    BoolArray,
    UUIDArray,
    UnixArray,

    // I dont really like that there is a sperate function but ok
    // I had to do that so I can pass a second argument
    fn readStr(_: DType, reader: anytype, str_index: *usize) !Data {
        // Read the length of the string
        var len_buffer: [4]u8 = undefined;
        _ = try reader.readAtLeast(len_buffer[0..], @sizeOf(u32));
        const len = @as(usize, @intCast(std.mem.bytesToValue(u32, &len_buffer)));

        const end = str_index.* + len;
        if (end > string_buffer.len) return error.BufferFull;

        // Read the string
        _ = try reader.readAtLeast(string_buffer[str_index.*..end], len);
        const data = Data{ .Str = string_buffer[str_index.*..end] };

        str_index.* += len;
        return data;
    }

    fn readArray(self: DType, reader: anytype, array_index: *usize) !Data {
        // First 8 byte of an array is the number of u8 that take this array
        // This speed up the reading and allow str array easely
        var len_buffer: [8]u8 = undefined;
        _ = try reader.readAtLeast(len_buffer[0..], @sizeOf(u64));
        const len = @as(usize, @intCast(std.mem.bytesToValue(u64, &len_buffer)));

        // Get the end of the slice use in the array buffer and check if not too long
        const origin = array_index.*;
        const start = array_index.* + @sizeOf(u64);
        const end = start + len;
        if (end > array_buffer.len) return error.BufferFull;

        // Copy the len of the array and read all value
        @memcpy(array_buffer[array_index.*..start], len_buffer[0..]);
        _ = try reader.readAtLeast(array_buffer[start..end], len);
        array_index.* = end;

        return switch (self) {
            .IntArray => Data{ .IntArray = array_buffer[origin..end] },
            .FloatArray => Data{ .FloatArray = array_buffer[origin..end] },
            .BoolArray => Data{ .BoolArray = array_buffer[origin..end] },
            .UUIDArray => Data{ .UUIDArray = array_buffer[origin..end] },
            .UnixArray => Data{ .UnixArray = array_buffer[origin..end] },
            .StrArray => Data{ .StrArray = array_buffer[origin..end] },
            else => unreachable,
        };
    }

    fn read(self: DType, reader: anytype) !Data {
        switch (self) {
            .Int => {
                var buffer: [@sizeOf(i32)]u8 = undefined;
                _ = try reader.readAtLeast(buffer[0..], @sizeOf(i32));
                return Data{ .Int = std.mem.bytesToValue(i32, &buffer) };
            },
            .Float => {
                var buffer: [@sizeOf(f64)]u8 = undefined;
                _ = try reader.readAtLeast(buffer[0..], @sizeOf(f64));
                return Data{ .Float = std.mem.bytesToValue(f64, &buffer) };
            },
            .Bool => {
                var buffer: [@sizeOf(bool)]u8 = undefined;
                _ = try reader.readAtLeast(buffer[0..], @sizeOf(bool));
                return Data{ .Bool = std.mem.bytesToValue(bool, &buffer) };
            },
            .UUID => {
                var buffer: [@sizeOf([16]u8)]u8 = undefined;
                _ = try reader.readAtLeast(buffer[0..], @sizeOf([16]u8));
                return Data{ .UUID = std.mem.bytesToValue([16]u8, &buffer) };
            },
            .Unix => {
                var buffer: [@sizeOf(u64)]u8 = undefined;
                _ = try reader.readAtLeast(buffer[0..], @sizeOf(u64));
                return Data{ .Unix = std.mem.bytesToValue(u64, &buffer) };
            },
            else => unreachable,
        }
    }
};

pub const Data = union(DType) {
    Int: i32,
    Float: f64,
    Str: []const u8,
    Bool: bool,
    UUID: [16]u8,
    Unix: u64,

    IntArray: []const u8,
    FloatArray: []const u8,
    StrArray: []const u8,
    BoolArray: []const u8,
    UUIDArray: []const u8,
    UnixArray: []const u8,

    /// Number of bytes that will be use in the file
    pub fn size(self: Data) usize {
        return switch (self) {
            .Int => @sizeOf(i32),
            .Float => @sizeOf(f64),
            .Str => 4 + self.Str.len,
            .Bool => @sizeOf(bool),
            .UUID => @sizeOf([16]u8),
            .Unix => @sizeOf(u64),

            .IntArray => self.IntArray.len,
            .FloatArray => self.FloatArray.len,
            .StrArray => self.StrArray.len,
            .BoolArray => self.BoolArray.len,
            .UUIDArray => self.UUIDArray.len,
            .UnixArray => self.UnixArray.len,
        };
    }

    /// Write the value in bytes
    fn write(self: Data, writer: anytype) !void {
        switch (self) {
            .Str => |v| {
                const len = @as(u32, @intCast(v.len));
                try writer.writeAll(std.mem.asBytes(&len));
                try writer.writeAll(v);
            },
            .UUID => |v| try writer.writeAll(&v),
            .Int => |v| try writer.writeAll(std.mem.asBytes(&v)),
            .Float => |v| try writer.writeAll(std.mem.asBytes(&v)),
            .Bool => |v| try writer.writeAll(std.mem.asBytes(&v)),
            .Unix => |v| try writer.writeAll(std.mem.asBytes(&v)),

            .StrArray => |v| try writer.writeAll(v),
            .UUIDArray => |v| try writer.writeAll(v),
            .IntArray => |v| try writer.writeAll(v),
            .FloatArray => |v| try writer.writeAll(v),
            .BoolArray => |v| try writer.writeAll(v),
            .UnixArray => |v| try writer.writeAll(v),
        }
    }

    pub fn initInt(value: i32) Data {
        return Data{ .Int = value };
    }

    pub fn initFloat(value: f64) Data {
        return Data{ .Float = value };
    }

    pub fn initStr(value: []const u8) Data {
        return Data{ .Str = value };
    }

    pub fn initBool(value: bool) Data {
        return Data{ .Bool = value };
    }

    pub fn initUUID(value: [16]u8) Data {
        return Data{ .UUID = value };
    }

    pub fn initUnix(value: u64) Data {
        return Data{ .Unix = value };
    }

    pub fn initIntArray(value: []const u8) Data {
        return Data{ .IntArray = value };
    }

    pub fn initFloatArray(value: []const u8) Data {
        return Data{ .FloatArray = value };
    }

    pub fn initStrArray(value: []const u8) Data {
        return Data{ .StrArray = value };
    }

    pub fn initBoolArray(value: []const u8) Data {
        return Data{ .BoolArray = value };
    }

    pub fn initUUIDArray(value: []const u8) Data {
        return Data{ .UUIDArray = value };
    }

    pub fn initUnixArray(value: []const u8) Data {
        return Data{ .UnixArray = value };
    }
};

// I know, I know I use @sizeOf too much, but I like it. Allow me to understand what it represent

/// Take an array of zig type and return an encoded version to use with Data.initType
/// Like that: Data.initIntArray(try allocEncodArray.Int(my_array))
/// Don't forget to free it! allocator.free(data.IntArray)
pub const allocEncodArray = struct {
    pub fn Int(allocator: std.mem.Allocator, items: []const i32) ![]const u8 {
        // Create a buffer of the right size
        var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(i32) * items.len);

        // Ge the len use by the array in bytes, array len not included (The first 8 bytes)
        const items_len: u64 = items.len * @sizeOf(i32);

        // Write the first 8 bytes as the number of items in the array
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));

        // Write all value in the array
        var start: usize = @sizeOf(u64);
        for (items) |item| {
            const end: usize = start + @sizeOf(i32);
            @memcpy(buffer[start..end], std.mem.asBytes(&item));
            start = end;
        }

        return buffer;
    }

    pub fn Float(allocator: std.mem.Allocator, items: []const f64) ![]const u8 {
        var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(f64) * items.len);
        const items_len: u64 = items.len * @sizeOf(f64);
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));

        var start: usize = @sizeOf(u64);
        for (items) |item| {
            const end: usize = start + @sizeOf(f64);
            @memcpy(buffer[start..end], std.mem.asBytes(&item));
            start = end;
        }

        return buffer;
    }

    pub fn Bool(allocator: std.mem.Allocator, items: []const bool) ![]const u8 {
        var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(bool) * items.len);
        const items_len: u64 = items.len * @sizeOf(bool);
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));

        var start: usize = @sizeOf(u64);
        for (items) |item| {
            const end: usize = start + @sizeOf(bool);
            @memcpy(buffer[start..end], std.mem.asBytes(&item));
            start = end;
        }

        return buffer;
    }

    pub fn UUID(allocator: std.mem.Allocator, items: []const [16]u8) ![]const u8 {
        var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf([16]u8) * items.len);
        const items_len: u64 = items.len * @sizeOf([16]u8);
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));

        var start: usize = @sizeOf(u64);
        for (items) |item| {
            const end: usize = start + @sizeOf([16]u8);
            @memcpy(buffer[start..end], &item);
            start = end;
        }

        return buffer;
    }

    pub fn Unix(allocator: std.mem.Allocator, items: []const u64) ![]const u8 {
        var buffer = try allocator.alloc(u8, @sizeOf(u64) + @sizeOf(u64) * items.len);
        const items_len: u64 = items.len * @sizeOf(u64);
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&items_len));

        var start: usize = @sizeOf(u64);
        for (items) |item| {
            const end: usize = start + @sizeOf(u64);
            @memcpy(buffer[start..end], std.mem.asBytes(&item));
            start = end;
        }

        return buffer;
    }

    pub fn Str(allocator: std.mem.Allocator, items: []const []const u8) ![]const u8 {
        var total_len: usize = @sizeOf(u64);
        for (items) |item| {
            total_len += @sizeOf(u64) + @sizeOf(u8) * item.len;
        }

        var buffer = try allocator.alloc(u8, total_len);

        // Write the total number of bytes used by this array as the first 8 bytes. Those first 8 are not included
        @memcpy(buffer[0..@sizeOf(u64)], std.mem.asBytes(&(total_len - @sizeOf(u64))));

        // Write the rest, the number of u8 then the array itself, repeat
        var start: usize = @sizeOf(u64);
        var end: usize = 0;
        for (items) |item| {
            // First write the len of the str
            end = start + @sizeOf(u64);
            @memcpy(buffer[start..end], std.mem.asBytes(&item.len));

            end += item.len;
            @memcpy(buffer[(start + @sizeOf(u64))..end], item);
            start = end;
        }

        return buffer;
    }
};

/// This take the name of a file and a schema and return an iterator.
/// You can then use it in a while loop and it will yeild []Data type.
/// One for each write. This is basically like a row in a table.
pub const DataIterator = struct {
    allocator: std.mem.Allocator,
    file: std.fs.File,
    reader: std.io.BufferedReader(4096, std.fs.File.Reader),

    schema: []const DType,
    data: []Data,

    index: usize = 0,
    file_len: usize,
    str_index: usize = 0,
    array_index: usize = 0,

    pub fn init(allocator: std.mem.Allocator, name: []const u8, dir: ?std.fs.Dir, schema: []const DType) !DataIterator {
        const d_ = dir orelse std.fs.cwd();
        const file = try d_.openFile(name, .{ .mode = .read_only });

        return DataIterator{
            .allocator = allocator,
            .file = file,
            .schema = schema,
            .reader = std.io.bufferedReader(file.reader()),
            .data = try allocator.alloc(Data, schema.len),
            .file_len = try file.getEndPos(),
        };
    }

    pub fn deinit(self: *DataIterator) void {
        self.allocator.free(self.data);
        self.file.close();
    }

    pub fn next(self: *DataIterator) !?[]Data {
        self.str_index = 0;
        self.array_index = 0;
        if (self.index >= self.file_len) return null;

        var i: usize = 0;
        while (i < self.schema.len) : (i += 1) {
            self.data[i] = switch (self.schema[i]) {
                .Str => try self.schema[i].readStr(self.reader.reader(), &self.str_index),
                .IntArray,
                .FloatArray,
                .BoolArray,
                .StrArray,
                .UUIDArray,
                .UnixArray,
                => try self.schema[i].readArray(self.reader.reader(), &self.array_index),
                else => try self.schema[i].read(self.reader.reader()),
            };
            self.index += self.data[i].size();
        }

        return self.data;
    }
};

pub const DataIteratorFullBuffer = struct {
    allocator: std.mem.Allocator,
    file: std.fs.File,
    reader: std.io.BufferedReader(4096, std.fs.File.Reader),

    schema: []const DType,
    data: []Data,

    index: usize = 0,
    file_len: usize,
    str_index: usize = 0,
    array_index: usize = 0,

    pub fn init(allocator: std.mem.Allocator, name: []const u8, dir: ?std.fs.Dir, schema: []const DType) !DataIterator {
        const d_ = dir orelse std.fs.cwd();
        const file = try d_.openFile(name, .{ .mode = .read_only });

        return DataIterator{
            .allocator = allocator,
            .file = file,
            .schema = schema,
            .reader = std.io.bufferedReader(file.reader()),
            .data = try allocator.alloc(Data, schema.len),
            .file_len = try file.getEndPos(),
        };
    }

    pub fn deinit(self: *DataIterator) void {
        self.allocator.free(self.data);
        self.file.close();
    }

    pub fn next(self: *DataIterator) !?[]Data {
        self.str_index = 0;
        self.array_index = 0;
        if (self.index >= self.file_len) return null;

        var i: usize = 0;
        while (i < self.schema.len) : (i += 1) {
            self.data[i] = switch (self.schema[i]) {
                .Str => try self.schema[i].readStr(self.reader.reader(), &self.str_index),
                .IntArray,
                .FloatArray,
                .BoolArray,
                .StrArray,
                .UUIDArray,
                .UnixArray,
                => try self.schema[i].readArray(self.reader.reader(), &self.array_index),
                else => try self.schema[i].read(self.reader.reader()),
            };
            self.index += self.data[i].size();
        }

        return self.data;
    }
};

/// When using DataIterator, if one Data is an array (like IntArray). You need to use that to create a sub iterator that return the Data inside the array.
/// This is mainly for performance reason as you only iterate an array if needed, otherwise it is just a big blob of u8, like a str
pub const ArrayIterator = struct {
    data: Data,
    end: usize,
    index: usize,

    pub fn init(data: Data) !ArrayIterator {
        const len = switch (data) {
            .IntArray,
            .FloatArray,
            .BoolArray,
            .StrArray,
            .UUIDArray,
            .UnixArray,
            => |buffer| @as(usize, @intCast(std.mem.bytesToValue(u64, buffer[0..@sizeOf(u64)]))) + @sizeOf(u64),
            else => return error.NonArrayDType,
        };

        return ArrayIterator{
            .data = data,
            .end = len,
            .index = @sizeOf(u64),
        };
    }

    pub fn next(self: *ArrayIterator) ?Data {
        if (self.index >= self.end) return null;

        switch (self.data) {
            .IntArray => |buffer| {
                self.index += @sizeOf(i32);
                return Data{ .Int = std.mem.bytesToValue(i32, buffer[(self.index - @sizeOf(i32))..self.index]) };
            },
            .FloatArray => |buffer| {
                self.index += @sizeOf(f64);
                return Data{ .Float = std.mem.bytesToValue(f64, buffer[(self.index - @sizeOf(f64))..self.index]) };
            },
            .BoolArray => |buffer| {
                self.index += @sizeOf(bool);
                return Data{ .Bool = std.mem.bytesToValue(bool, buffer[(self.index - @sizeOf(bool))..self.index]) };
            },
            .UUIDArray => |buffer| {
                self.index += @sizeOf([16]u8);
                return Data{ .UUID = std.mem.bytesToValue([16]u8, buffer[(self.index - @sizeOf([16]u8))..self.index]) };
            },
            .UnixArray => |buffer| {
                self.index += @sizeOf(u64);
                return Data{ .Unix = std.mem.bytesToValue(u64, buffer[(self.index - @sizeOf(u64))..self.index]) };
            },
            .StrArray => |buffer| {
                // Read first 8 bytes as len, copy it into the buffer then return the slice
                const len = @as(usize, @intCast(std.mem.bytesToValue(u64, buffer[self.index..(self.index + @sizeOf(u64))])));
                self.index += @sizeOf(u64) + len;
                return Data{ .Str = buffer[(self.index - len)..self.index] };
            },
            else => unreachable,
        }
    }
};

/// A data writer to write into a file. I use a struct so I can use a buffer and improve perf
/// I added a seperated flush method, to not flush at each write. Otherwise it is very long
/// Performance concern once again.
pub const DataWriter = struct {
    file: std.fs.File,
    writer: std.io.BufferedWriter(4096, std.fs.File.Writer),

    pub fn init(name: []const u8, dir: ?std.fs.Dir) !DataWriter {
        const d_ = dir orelse std.fs.cwd();
        const file = try d_.openFile(name, .{ .mode = .write_only });
        try file.seekFromEnd(0);

        return DataWriter{
            .file = file,
            .writer = std.io.bufferedWriter(file.writer()),
        };
    }

    pub fn deinit(self: *DataWriter) void {
        self.file.close();
    }

    pub fn write(self: *DataWriter, data: []const Data) !void {
        for (data) |d| try d.write(self.writer.writer());
    }

    pub fn flush(self: *DataWriter) !void {
        try self.writer.flush();
    }
};

/// Create a new data file that can then be use by the DataWriter
pub fn createFile(name: []const u8, dir: ?std.fs.Dir) !void {
    const d = dir orelse std.fs.cwd();
    const file = try d.createFile(name, .{});
    defer file.close();
}

/// Self explainatory.
pub fn deleteFile(name: []const u8, dir: ?std.fs.Dir) !void {
    const d = dir orelse std.fs.cwd();
    try d.deleteFile(name);
}

/// Just to keep a similar API
pub fn statFile(name: []const u8, dir: ?std.fs.Dir) !std.fs.File.Stat {
    const d = dir orelse std.fs.cwd();
    return d.statFile(name);
}

// I have almost more lines of test than the real stuff x)
// But I think everything is tested to be fair, so good stuff
// It also write benchmark so you can benchmark on your own hardware
// The data write and read is not really representative of real worl tho

test "Array Iterators" {
    const allocator = std.testing.allocator;

    try std.fs.cwd().makeDir("array_tmp");
    var dir = try std.fs.cwd().openDir("array_tmp", .{});
    defer {
        dir.close();
        std.fs.cwd().deleteDir("array_tmp") catch {};
    }

    // Test data
    const int_array = [_]i32{ 32, 11, 15, 99 };
    const float_array = [_]f64{ 3.14, 2.718, 1.414, 0.577 };
    const bool_array = [_]bool{ true, false, true, false };
    const uuid_array = [_][16]u8{
        [_]u8{0} ** 16,
        [_]u8{1} ** 16,
        [_]u8{2} ** 16,
        [_]u8{3} ** 16,
    };
    const unix_array = [_]u64{ 1623456789, 1623456790, 1623456791, 1623456792 };
    const str_array = [_][]const u8{ "Hello", " world" };

    const data = [_]Data{
        Data.initIntArray(try allocEncodArray.Int(allocator, &int_array)),
        Data.initFloatArray(try allocEncodArray.Float(allocator, &float_array)),
        Data.initBoolArray(try allocEncodArray.Bool(allocator, &bool_array)),
        Data.initUUIDArray(try allocEncodArray.UUID(allocator, &uuid_array)),
        Data.initUnixArray(try allocEncodArray.Unix(allocator, &unix_array)),
        Data.initStrArray(try allocEncodArray.Str(allocator, &str_array)),
    };
    defer {
        allocator.free(data[0].IntArray);
        allocator.free(data[1].FloatArray);
        allocator.free(data[2].BoolArray);
        allocator.free(data[3].UUIDArray);
        allocator.free(data[4].UnixArray);
        allocator.free(data[5].StrArray);
    }

    // Write data to file
    try createFile("test_arrays", dir);
    var dwriter = try DataWriter.init("test_arrays", dir);
    defer dwriter.deinit();
    try dwriter.write(&data);
    try dwriter.flush();

    // Read and verify data
    const schema = &[_]DType{ .IntArray, .FloatArray, .BoolArray, .UUIDArray, .UnixArray, .StrArray };
    var iter = try DataIterator.init(allocator, "test_arrays", dir, schema);
    defer iter.deinit();

    if (try iter.next()) |row| {
        // Int Array
        {
            var array_iter = try ArrayIterator.init(row[0]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectEqual(int_array[i], d.Int);
                i += 1;
            }
            try std.testing.expectEqual(int_array.len, i);
        }

        // Float Array
        {
            var array_iter = try ArrayIterator.init(row[1]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectApproxEqAbs(float_array[i], d.Float, 0.0001);
                i += 1;
            }
            try std.testing.expectEqual(float_array.len, i);
        }

        // Bool Array
        {
            var array_iter = try ArrayIterator.init(row[2]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectEqual(bool_array[i], d.Bool);
                i += 1;
            }
            try std.testing.expectEqual(bool_array.len, i);
        }

        // UUID Array
        {
            var array_iter = try ArrayIterator.init(row[3]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectEqualSlices(u8, &uuid_array[i], &d.UUID);
                i += 1;
            }
            try std.testing.expectEqual(uuid_array.len, i);
        }

        // Unix Array
        {
            var array_iter = try ArrayIterator.init(row[4]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectEqual(unix_array[i], d.Unix);
                i += 1;
            }
            try std.testing.expectEqual(unix_array.len, i);
        }

        // Str Array
        {
            var array_iter = try ArrayIterator.init(row[5]);
            var i: usize = 0;
            while (array_iter.next()) |d| {
                try std.testing.expectEqualStrings(str_array[i], d.Str);
                i += 1;
            }
            try std.testing.expectEqual(str_array.len, i);
        }
    } else {
        return error.TestUnexpectedNull;
    }

    try deleteFile("test_arrays", dir);
}

test "Write and Read" {
    const allocator = std.testing.allocator;

    try std.fs.cwd().makeDir("tmp");
    const dir = try std.fs.cwd().openDir("tmp", .{});

    const data = [_]Data{
        Data.initInt(1),
        Data.initFloat(3.14159),
        Data.initInt(-5),
        Data.initStr("Hello world"),
        Data.initBool(true),
        Data.initUnix(12476),
        Data.initStr("Another string =)"),
    };

    try createFile("test", dir);

    var dwriter = try DataWriter.init("test", dir);
    defer dwriter.deinit();
    try dwriter.write(&data);
    try dwriter.flush();

    const schema = &[_]DType{
        .Int,
        .Float,
        .Int,
        .Str,
        .Bool,
        .Unix,
        .Str,
    };
    var iter = try DataIterator.init(allocator, "test", dir, schema);
    defer iter.deinit();

    if (try iter.next()) |row| {
        try std.testing.expectEqual(1, row[0].Int);
        try std.testing.expectApproxEqAbs(3.14159, row[1].Float, 0.00001);
        try std.testing.expectEqual(-5, row[2].Int);
        try std.testing.expectEqualStrings("Hello world", row[3].Str);
        try std.testing.expectEqual(true, row[4].Bool);
        try std.testing.expectEqual(12476, row[5].Unix);
        try std.testing.expectEqualStrings("Another string =)", row[6].Str);
    } else {
        return error.TestUnexpectedNull;
    }

    try deleteFile("test", dir);
    try std.fs.cwd().deleteDir("tmp");
}

test "Benchmark Write and Read" {
    const schema = &[_]DType{
        .Int,
        .Float,
        .Int,
        .Str,
        .Bool,
        .Unix,
    };

    const data = &[_]Data{
        Data.initInt(1),
        Data.initFloat(3.14159),
        Data.initInt(-5),
        Data.initStr("Hello world"),
        Data.initBool(true),
        Data.initUnix(2021),
    };

    try benchmark(schema, data);
}

fn benchmark(schema: []const DType, data: []const Data) !void {
    const allocator = std.testing.allocator;
    const sizes = [_]usize{ 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000 };

    try std.fs.cwd().makeDir("benchmark_tmp");
    const dir = try std.fs.cwd().openDir("benchmark_tmp", .{});
    defer std.fs.cwd().deleteDir("benchmark_tmp") catch {};

    for (sizes) |size| {
        std.debug.print("\nBenchmarking with {d} rows:\n", .{size});

        // Benchmark write
        const write_start = std.time.nanoTimestamp();
        try createFile("benchmark", dir);

        var dwriter = try DataWriter.init("benchmark", dir);
        defer dwriter.deinit();
        for (0..size) |_| try dwriter.write(data);
        try dwriter.flush();
        const write_end = std.time.nanoTimestamp();
        const write_duration = @as(f64, @floatFromInt(write_end - write_start)) / 1e6;

        std.debug.print("Write time: {d:.6} ms\n", .{write_duration});
        std.debug.print("Average write time: {d:.2} μs\n", .{write_duration / @as(f64, @floatFromInt(size)) * 1000});

        // Benchmark read
        const read_start = std.time.nanoTimestamp();
        var iter = try DataIterator.init(allocator, "benchmark", dir, schema);
        defer iter.deinit();

        var count: usize = 0;
        while (try iter.next()) |_| {
            count += 1;
        }
        const read_end = std.time.nanoTimestamp();
        const read_duration = @as(f64, @floatFromInt(read_end - read_start)) / 1e6;

        std.debug.print("Read time: {d:.6} ms\n", .{read_duration});
        std.debug.print("Average read time: {d:.2} μs\n", .{read_duration / @as(f64, @floatFromInt(size)) * 1000});
        try std.testing.expectEqual(size, count);

        std.debug.print("{any}", .{statFile("benchmark", dir)});

        try deleteFile("benchmark", dir);
        std.debug.print("\n", .{});
    }
}

test "Benchmark Type" {
    const random = std.crypto.random;
    const uuid = [16]u8{
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
        random.int(u8),
    };

    try benchmarkType(.Int, Data.initInt(random.int(i32)));
    try benchmarkType(.Float, Data.initFloat(random.float(f64)));
    try benchmarkType(.Bool, Data.initBool(random.boolean()));
    try benchmarkType(.Str, Data.initStr("Hello world"));
    try benchmarkType(.UUID, Data.initUUID(uuid));
    try benchmarkType(.Unix, Data.initUnix(random.int(u64)));
}

fn benchmarkType(dtype: DType, data: Data) !void {
    const allocator = std.testing.allocator;

    const size = 1_000_000;

    try std.fs.cwd().makeDir("benchmark_type_tmp");
    const dir = try std.fs.cwd().openDir("benchmark_type_tmp", .{});
    defer std.fs.cwd().deleteDir("benchmark_type_tmp") catch {};

    std.debug.print("\nBenchmarking with {any} rows:\n", .{dtype});

    // Benchmark write
    const write_start = std.time.nanoTimestamp();
    try createFile("benchmark", dir);

    const datas = &[_]Data{data};

    var dwriter = try DataWriter.init("benchmark", dir);
    defer dwriter.deinit();
    for (0..size) |_| try dwriter.write(datas);
    try dwriter.flush();
    const write_end = std.time.nanoTimestamp();
    const write_duration = @as(f64, @floatFromInt(write_end - write_start)) / 1e6;

    std.debug.print("Write time: {d:.6} ms\n", .{write_duration});

    const schema = &[_]DType{dtype};

    // Benchmark read
    const read_start = std.time.nanoTimestamp();
    var iter = try DataIterator.init(allocator, "benchmark", dir, schema);
    defer iter.deinit();

    var count: usize = 0;
    while (try iter.next()) |_| {
        count += 1;
    }
    const read_end = std.time.nanoTimestamp();
    const read_duration = @as(f64, @floatFromInt(read_end - read_start)) / 1e6;

    std.debug.print("Read time: {d:.6} ms\n", .{read_duration});
    try std.testing.expectEqual(size, count);

    std.debug.print("{any}", .{statFile("benchmark", dir)});

    try deleteFile("benchmark", dir);
    std.debug.print("\n", .{});
}