From 0127daa330fbc6c2984f03f714d6458b395ee677 Mon Sep 17 00:00:00 2001 From: MrBounty Date: Sat, 21 Dec 2024 15:13:50 +0100 Subject: [PATCH] Push before leaving for christmas --- docs/TODO v0.2.md | 42 ++--- docs/ZiQL.md | 1 - docs/ZipponData.md | 278 ++++++++++++++++++++++++++++++++++ docs/cli.md | 4 +- mkdocs.yml | 4 + src/entityWriter.zig | 44 ++++-- src/fileEngine.zig | 160 +++++++++++++++++-- src/stuffs/additionalData.zig | 10 ++ src/stuffs/relationMap.zig | 32 ++++ src/ziqlParser.zig | 14 +- test_runner.zig | 6 +- 11 files changed, 538 insertions(+), 57 deletions(-) create mode 100644 docs/ZipponData.md create mode 100644 src/stuffs/relationMap.zig diff --git a/docs/TODO v0.2.md b/docs/TODO v0.2.md index ffe05ce..ccbf894 100644 --- a/docs/TODO v0.2.md +++ b/docs/TODO v0.2.md @@ -1,14 +1,15 @@ - [ ] Delete the .new file if an error happend -- [ ] Create a struct that manage the schema +- [ ] Array manipulation +- [ ] Some time keyword like NOW Relationships - [X] Update the schema Parser and Tokenizer - [X] Include the name of the link struct with the schema_struct - [X] New ConditionValue that is an array of UUID -- [ ] When relationship found in filter, check if the type is right and exist -- [ ] When parseFilter, get list of UUID as value for relationship -- [ ] Add new operation in Filter evalue: IN and !IN -- [ ] parseNewData can use filter like in "Add User (friends = [10] {age > 20})" to return UUID +- [X] When relationship found in filter, check if the type is right and exist +- [X] When parseFilter, get list of UUID as value for relationship +- [X] Add new operation in Filter evalue: IN and !IN +- [~] parseNewData can use filter like in "Add User (friends = [10] {age > 20})" to return UUID - [ ] parseFilter can use sub filter. "GRAB User {friends IN {age > 20}}" At least one friend in a list of UUID - [ ] When send, send the entities in link specify between [] @@ -38,23 +39,22 @@ So I need a Radix tree to be able to find all file to parse. For example if I do "GRAB User [mom] {name = 'Bob'}". I parse one time the file to get all UUID of User that represent mom; the parse that is already done and need to be done. So if I found 3 Bob's mom UUID 2. Then I create a map of Bob's UUID as keys and a Str as value. The Str is the JSON string of the mom. For that I need to parse the file again and write using additional_data -### Radix tree +## Run in WASM for a demo -Ok so new problem. Given a list of UUID, I need a way to find all file index to parse. -And even better if I can get the number of UUID per files, so I can stop parsing them early. +This could be fun, make a small demo where you get a wasm that run the database locally in the browser. -Happy to annonce the v0.2 of my database. New feature include: -- Relationship -- Huge performance increase with multi threading -- Date, time and datetime type -- Compressed binary files -- Logs +## How do I return relationship -All core features of the query language, exept linked queries, is working, v0.3 will focus on adding things around it, including: -- Schema migration -- Dump/Bump data -- Recovery -- Better CLI +So lets say I have a query that get 100 comments. And I return Comment.User. That mean once I parsed all Comments and got all UUID of User in ConditionValue in a map. +I need to get all UUID, meaning concatenating all UUID of all ConditionValue into one map. Then I can parse `User` and create a new map with UUID as key and the JSON string as value. +Like that I can iterate as much as I want inside. -Query optimization for later: -- If a filter use id to find something, to stop after find it, as I know there is no other struct with the same id +That mean: + +- If I have a link in AdditionalData to + - Get all UUID that I need the data (concatenate all maps) + - Create a new map UUID/JSON object + - Parse files and populate the new maps + +Which also mean that I need to do all of them at the same time at the beguinning. So using AdditionalData, I iterate over all Nodes, find all Links and do what I said above. +I can then save those map into a map with as key the path like `Comment.friends` and value the map that contain UUID/JSON diff --git a/docs/ZiQL.md b/docs/ZiQL.md index bc6886b..fdee9ba 100644 --- a/docs/ZiQL.md +++ b/docs/ZiQL.md @@ -68,7 +68,6 @@ The main action is `GRAB`, this will parse files and return data. #### Basic - Here's how to return all `User` entities without any filtering: ```python GRAB User diff --git a/docs/ZipponData.md b/docs/ZipponData.md new file mode 100644 index 0000000..31c6a66 --- /dev/null +++ b/docs/ZipponData.md @@ -0,0 +1,278 @@ +# ZipponData + +ZipponData is a library developped in the context of [ZipponDB](https://github.com/MrBounty/ZipponDB/tree/v0.1.3). + +The library intent to create a simple way to store and parse data from a file in the most efficient and fast way possible. + +There is 6 data type available in ZipponData: + +| Type | Zig type | Bytes in file | +| --- | --- | --- | +| int | i32 | 4 | +| float | f64 | 8 | +| bool | bool | 1 | +| str | []u8 | 4 + len | +| uuid | [16]u8 | 16 | +| unix | u64 | 8 | + +Each type have its array equivalent. + +## Quickstart + +1. Create a file with `createFile` +2. Create some `Data` +3. Create a `DataWriter` +4. Write the data +5. Create a schema +6. Create an iterator with `DataIterator` +7. Iterate over all value +8. Delete the file with `deleteFile` + +Here an example of how to use it: +``` zig +const std = @import("std"); + +pub fn main() !void { + const allocator = std.testing.allocator; + + // 0. Make a temporary directory + try std.fs.cwd().makeDir("tmp"); + const dir = try std.fs.cwd().openDir("tmp", .{}); + + // 1. Create a file + try createFile("test", dir); + + // 2. Create some Data + const data = [_]Data{ + Data.initInt(1), + Data.initFloat(3.14159), + Data.initInt(-5), + Data.initStr("Hello world"), + Data.initBool(true), + Data.initUnix(2021), + }; + + // 3. Create a DataWriter + var dwriter = try DataWriter.init("test", dir); + defer dwriter.deinit(); // This just close the file + + // 4. Write some data + try dwriter.write(&data); + try dwriter.write(&data); + try dwriter.write(&data); + try dwriter.write(&data); + try dwriter.write(&data); + try dwriter.write(&data); + try dwriter.flush(); // Dont forget to flush ! + + // 5. Create a schema + // A schema is how the iterator will parse the file. + // If you are wrong here, it will return wrong/random data + // And most likely an error when iterating in the while loop + const schema = &[_]DType{ + .Int, + .Float, + .Int, + .Str, + .Bool, + .Unix, + }; + + // 6. Create a DataIterator + var iter = try DataIterator.init(allocator, "test", dir, schema); + defer iter.deinit(); + + // 7. Iterate over data + while (try iter.next()) |row| { + std.debug.print("Row: {any}\n", .{ row }); + } + + // 8. Delete the file (Optional ofc) + try deleteFile("test", dir); + try std.fs.cwd().deleteDir("tmp"); +} +``` + +***Note: The dir can be null and it will use cwd.*** + +# Array + +All data type have an array equivalent. To write an array, you need to first encode it using `allocEncodArray` before writing it. +This use an allocator so you need to free what it return. + +When read, an array is just the raw bytes. To get the data itself, you need to create an `ArrayIterator`. Here an example: + +```zig +pub fn main() !void { + const allocator = std.testing.allocator; + + // 0. Make a temporary directory + try std.fs.cwd().makeDir("array_tmp"); + const dir = try std.fs.cwd().openDir("array_tmp", .{}); + + // 1. Create a file + try createFile("test", dir); + + // 2. Create and encode some Data + const int_array = [4]i32{ 32, 11, 15, 99 }; + const data = [_]Data{ + Data.initIntArray(try allocEncodArray.Int(allocator, &int_array)), // Encode + }; + defer allocator.free(data[0].IntArray); // DOnt forget to free it + + // 3. Create a DataWriter + var dwriter = try DataWriter.init("test", dir); + defer dwriter.deinit(); + + // 4. Write some data + try dwriter.write(&data); + try dwriter.flush(); + + // 5. Create a schema + const schema = &[_]DType{ + .IntArray, + }; + + // 6. Create a DataIterator + var iter = try DataIterator.init(allocator, "test", dir, schema); + defer iter.deinit(); + + // 7. Iterate over data + var i: usize = 0; + if (try iter.next()) |row| { + + // 8. Iterate over array + var array_iter = ArrayIterator.init(&row[0]); // Sub array iterator + while (array_iter.next()) |d| { + try std.testing.expectEqual(int_array[i], d.Int); + i += 1; + } + + } + + try deleteFile("test", dir); + try std.fs.cwd().deleteDir("array_tmp"); +} +``` + +# Benchmark + +Done on a AMD Ryzen 7 7800X3D with a Samsung SSD 980 PRO 2TB (up to 7,000/5,100MB/s for read/write speed) on one thread. + +| Rows | Write Time (ms) | Average Write Time (μs) | Read Time (ms) | Average Read Time (μs) | File Size (kB) | +| --- | --- | --- | --- | --- | --- | +| 1 | 0.01 | 13.63 | 0.025 | 25.0 | 0.04 | +| 10 | 0.01 | 1.69 | 0.03 | 3.28 | 0.4 | +| 100 | 0.04 | 0.49 | 0.07 | 0.67 | 4.0 | +| 1_000 | 0.36 | 0.36 | 0.48 | 0.48 | 40 | +| 10_000 | 3.42 | 0.34 | 4.67 | 0.47 | 400 | +| 100_000 | 36.39 | 0.36 | 48.00 | 0.49 | 4_000 | +| 1_000_000 | 361.41 | 0.36 | 481.00 | 0.48 | 40_000 | + +TODO: Update number to use Unix one. Benchmark on my laptop and maybe on some cloud VM. + +Data use: +```zig +const schema = &[_]DType{ + .Int, + .Float, + .Int, + .Str, + .Bool, + .Unix, +}; + +const data = &[_]Data{ + Data.initInt(1), + Data.initFloat(3.14159), + Data.initInt(-5), + Data.initStr("Hello world"), + Data.initBool(true), + Data.initUnix(2021), +}; +``` + +***Note: You can check Benchmark.md in ZipponDB to see performance using multi-threading. Was able to parse 1_000_000 users in less than 100ms*** + +# Importing the package + +Create a `build.zig.zon` next to `build.zig` if not already done. + +Add this dependencies in `build.zig.zon`: +```zig +.ZipponData = .{ + .url = "git+https://github.com/MrBounty/ZipponData", + //the correct hash will be suggested by zig}, +``` + +Here what my complete `build.zig.zon` is for my project ZipponDB: +```zig +.{ + .name = "ZipponDB", + .version = "0.1.4", + .dependencies = .{ + .ZipponData = .{ + .url = "git+https://github.com/MrBounty/ZipponData", + //the correct hash will be suggested by zig}, + }, + .paths = .{ + "", + }, +} +``` + +And in `build.zig` you can import the module like this: +```zig +const zid = b.dependency("ZipponData", .{}); +exe.root_module.addImport("ZipponData", zid.module("ZipponData")); +``` + +And you can now import it like std in your project: +```zig +const zid = @import("ZipponData"); +zid.createFile("Hello.zid", null); +``` + +# What you can't do + +You can't update files. You gonna need to implement that yourself. The easier way (and only I know), is to parse the entire file and write it into another. + +Here an example that evaluate all struct using a `Filter` and write only struct that are false. (A filter can be like `age > 20`, if the member `age` of the struct is `> 20`, it is true): +```zig +pub fn delete(file_name: []const u8, dir: std.fs.Dir, filter: Filter) !void { + // 1. Create the iterator of the current file + var iter = try zid.DataIterator.init(self.allocator, file_name, dir, sstruct.zid_schema); + defer iter.deinit(); + + // 2. Create a new file + const new_path_buff = try std.fmt.allocPrint(self.allocator, "{s}.new", .{file_name}); + defer self.allocator.free(new_path_buff); + try zid.createFile(new_path_buff, dir); + + // 3. Create a writer of the new data + var new_writer = try zid.DataWriter.init(new_path_buff, dir); + defer new_writer.deinit(); + + // 4. For all struct, evaluate and write to new file if false + while (try iter.next()) |row| { + if (!filter.evaluate(row)) { + try new_writer.write(row); + } + } + + // 5. Flush, delete old file and rename new file to previous file + try new_writer.flush(); + try dir.deleteFile(path_buff); + try dir.rename(new_path_buff, path_buff); +} +``` + +# Potential update + +I don't plan do update this but it will depend if my other project need it. + +- Functions to update files +- Add a header with the data type at the beginning of the file so no need to make a schema and I can check everytime I write if it's in the good format +- More type +- Multi threading + diff --git a/docs/cli.md b/docs/cli.md index 4489f79..a972fbd 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -62,9 +62,9 @@ Name | Type | Description | Default ---- | ---- | ------------------- | ---- TODO | TODO | TODO | TODO -### db state - WIP +### db state -Return the state of the database, either `MissingDatabase` if no database selected or `MissingSchema` if no schema was initialize. +Return the state of the database, either `Ok` or `MissingDatabase` if no database selected or `MissingSchema` if no schema was initialize. **Usage:** diff --git a/mkdocs.yml b/mkdocs.yml index 6b5187c..97c8c09 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,6 +14,7 @@ theme: palette: - media: "(prefers-color-scheme: light)" scheme: default + primary: lime toggle: icon: material/brightness-7 name: Switch to dark mode @@ -21,6 +22,7 @@ theme: # Palette toggle for dark mode - media: "(prefers-color-scheme: dark)" scheme: slate + primary: lime toggle: icon: material/brightness-4 name: Switch to light mode @@ -31,6 +33,7 @@ markdown_extensions: anchor_linenums: true line_spans: __span pygments_lang_class: true + use_pygments: true - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.superfences @@ -49,5 +52,6 @@ nav: - Under the Hood: - Benchmark: Benchmark.md - Technical: Technical docs.md + - ZipponData: ZipponData.md - About: about.md - Roadmap: Roadmap.md diff --git a/src/entityWriter.zig b/src/entityWriter.zig index 26e8662..7830cf6 100644 --- a/src/entityWriter.zig +++ b/src/entityWriter.zig @@ -77,23 +77,43 @@ pub const EntityWriter = struct { .IntArray => while (iter.next()) |v| writer.print("{d}, ", .{v.Int}) catch return ZipponError.WriteError, .FloatArray => while (iter.next()) |v| writer.print("{d}", .{v.Float}) catch return ZipponError.WriteError, .StrArray => while (iter.next()) |v| writer.print("\"{s}\"", .{v.Str}) catch return ZipponError.WriteError, - .UUIDArray => while (iter.next()) |v| writer.print("\"{s}\"", .{UUID.format_bytes(v.UUID)}) catch return ZipponError.WriteError, + .UUIDArray => while (iter.next()) |v| writer.print("\"{{|<{s}>|}}\"", .{UUID.format_bytes(v.UUID)}) catch return ZipponError.WriteError, .BoolArray => while (iter.next()) |v| writer.print("{any}", .{v.Bool}) catch return ZipponError.WriteError, - .UnixArray => { - while (iter.next()) |v| { - const datetime = DateTime.initUnix(v.Unix); - writer.writeByte('"') catch return ZipponError.WriteError; - switch (data_type) { - .date => datetime.format("YYYY/MM/DD", writer) catch return ZipponError.WriteError, - .time => datetime.format("HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError, - .datetime => datetime.format("YYYY/MM/DD-HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError, - else => unreachable, - } - writer.writeAll("\", ") catch return ZipponError.WriteError; + .UnixArray => while (iter.next()) |v| { + const datetime = DateTime.initUnix(v.Unix); + writer.writeByte('"') catch return ZipponError.WriteError; + switch (data_type) { + .date => datetime.format("YYYY/MM/DD", writer) catch return ZipponError.WriteError, + .time => datetime.format("HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError, + .datetime => datetime.format("YYYY/MM/DD-HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError, + else => unreachable, } + writer.writeAll("\", ") catch return ZipponError.WriteError; }, else => unreachable, } writer.writeByte(']') catch return ZipponError.WriteError; } + + /// TODO: + /// Take a string in the JSON format and look for {|<[16]u8>|}, then will look into the map and check if it can find this UUID + /// If it find it, it ill replace the {|<[16]u8>|} will the value + pub fn updateWithRelation(writer: anytype, input: []const u8, to_add: std.AutoHashMap([16]u8, []const u8)) ZipponError!void { + var start: usize = 0; + while (std.mem.indexOf(u8, input[start..], "{|<[")) |pos| { + const pattern_start = start + pos; + const pattern_end = std.mem.indexOf(u8, input[pattern_start..], "]>|}") orelse break; + const full_pattern_end = pattern_start + pattern_end + 4; + + // Write the text before the pattern + try writer.writeAll(input[start..pattern_start]); + + const uuid_bytes = input[pattern_start + 3 .. full_pattern_end - 3]; + writer.writeAll(to_add.get(uuid_bytes).?); + start = full_pattern_end; + } + + // Write any remaining text + try writer.writeAll(input[start..]); + } }; diff --git a/src/fileEngine.zig b/src/fileEngine.zig index f99a688..dffb17b 100644 --- a/src/fileEngine.zig +++ b/src/fileEngine.zig @@ -17,6 +17,8 @@ const DataType = dtype.DataType; const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData; const Filter = @import("stuffs/filter.zig").Filter; +const RelationMap = @import("stuffs/relationMap.zig").RelationMap; +const JsonString = @import("stuffs/relationMap.zig").JsonString; const ConditionValue = @import("stuffs/filter.zig").ConditionValue; const ZipponError = @import("stuffs/errors.zig").ZipponError; @@ -382,29 +384,29 @@ pub const FileEngine = struct { /// Take a filter, parse all file and if one struct if validate by the filter, write it in a JSON format to the writer /// filter can be null. This will return all of them - /// TODO: For relationship, if they are in additional_data and I need to return it with the other members, I will need to parse the file - /// This is difficult, because that mean I need to parse file while parsing files ? I dont like that because it may be the same struct - /// And because of multi thread, I can read the same file at the same time... pub fn parseEntities( self: *FileEngine, struct_name: []const u8, filter: ?Filter, additional_data: *AdditionalData, - writer: anytype, - ) ZipponError!void { + entry_allocator: Allocator, + ) ZipponError![]const u8 { var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); fa.reset(); const allocator = fa.allocator(); + var buff = std.ArrayList(u8).init(entry_allocator); + defer buff.deinit(); + const writer = buff.writer(); + const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name); const max_file_index = try self.maxFileIndex(sstruct.name); log.debug("Max file index {d}", .{max_file_index}); // If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them - if (additional_data.childrens.items.len == 0) { + if (additional_data.childrens.items.len == 0) additional_data.populateWithEverythingExceptLink(sstruct.members, sstruct.types) catch return FileEngineError.MemoryError; - } // Open the dir that contain all files const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{ .access_sub_paths = false }); @@ -441,9 +443,14 @@ pub const FileEngine = struct { } // Append all writer to each other - //writer.writeByte('[') catch return FileEngineError.WriteError; + writer.writeByte('[') catch return FileEngineError.WriteError; for (thread_writer_list) |list| writer.writeAll(list.items) catch return FileEngineError.WriteError; - //writer.writeByte(']') catch return FileEngineError.WriteError; + writer.writeByte(']') catch return FileEngineError.WriteError; + + // Here now I need to already have a populated list of RelationMap + // I will then call parseEntitiesRelationMap on each + + return buff.toOwnedSlice(); } fn parseEntitiesOneFile( @@ -493,6 +500,141 @@ pub const FileEngine = struct { _ = sync_context.completeThread(); } + // Receive a map of UUID -> null + // Will parse the files and update the value to the JSON string of the entity that represent the key + // Will then write the input with the JSON in the map looking for {|<>|} + // Once the new input received, call parseEntitiesRelationMap again the string still contain {|<>|} because of sub relationship + // The buffer contain the string with {|<>|} and need to be updated at the end + // TODO: Filter file that need to be parse to prevent parsing everything all the time + pub fn parseEntitiesRelationMap( + self: *FileEngine, + relation_map: *RelationMap, + buff: *std.ArrayList(u8), + ) ZipponError!void { + var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer); + fa.reset(); + const allocator = fa.allocator(); + + var new_buff = std.ArrayList(u8).init(allocator); + defer new_buff.deinit(); + const writer = new_buff.writer(); + + const sstruct = try self.schema_engine.structName2SchemaStruct(relation_map.struct_name); + const max_file_index = try self.maxFileIndex(sstruct.name); + + log.debug("Max file index {d}", .{max_file_index}); + + // If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them + if (relation_map.additional_data.childrens.items.len == 0) { + relation_map.additional_data.populateWithEverythingExceptLink(sstruct.members, sstruct.types) catch return FileEngineError.MemoryError; + } + + // Open the dir that contain all files + const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{ .access_sub_paths = false }); + + // Multi thread stuffs + var sync_context = ThreadSyncContext.init( + relation_map.additional_data.limit, + max_file_index + 1, + ); + + // Do one writer for each thread otherwise it create error by writing at the same time + var thread_map_list = allocator.alloc(std.AutoHashMap([16]u8, JsonString), max_file_index + 1) catch return FileEngineError.MemoryError; + + // Start parsing all file in multiple thread + for (0..(max_file_index + 1)) |file_index| { + thread_map_list[file_index] = relation_map.map.cloneWithAllocator(allocator); + + self.thread_pool.spawn(parseEntitiesRelationMapOneFile, .{ + &thread_map_list[file_index], + file_index, + dir, + sstruct.zid_schema, + relation_map.additional_data, + try self.schema_engine.structName2DataType(relation_map.struct_name), + &sync_context, + }) catch return FileEngineError.ThreadError; + } + + // Wait for all thread to either finish or return an error + while (!sync_context.isComplete()) { + std.time.sleep(10_000_000); // Check every 10ms + } + + // Now here I should have a list of copy of the map with all UUID a bit everywhere + + // Put all in the same map + for (thread_map_list) |map| { + var iter = map.iterator(); + while (iter.next()) |entry| { + if (entry.value_ptr.*) |json_string| relation_map.map.put(entry.key_ptr.*, json_string); + } + } + + // Here I write the new string and update the buff to have the new version + EntityWriter.updateWithRelation(writer, buff.items, relation_map.map); + buff.clearRetainingCapacity(); + buff.writer().writeAll(new_buff.items); + + // Now here I need to iterate if buff.items still have {|<>|} + } + + fn parseEntitiesRelationMapOneFile( + map: *std.AutoHashMap([16]u8, []const u8), + file_index: u64, + dir: std.fs.Dir, + zid_schema: []zid.DType, + additional_data: *AdditionalData, + data_types: []const DataType, + sync_context: *ThreadSyncContext, + ) void { + var data_buffer: [BUFFER_SIZE]u8 = undefined; + var fa = std.heap.FixedBufferAllocator.init(&data_buffer); + defer fa.reset(); + const allocator = fa.allocator(); + + const parent_alloc = map.allocator; + var string_list = std.ArrayList(u8).init(allocator); + const writer = string_list.writer(); + + const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| { + sync_context.logError("Error creating file path", err); + return; + }; + + var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch |err| { + sync_context.logError("Error initializing DataIterator", err); + return; + }; + + while (iter.next() catch |err| { + sync_context.logError("Error in iter next", err); + return; + }) |row| { + if (sync_context.checkStructLimit()) break; + if (!map.contains(row[0].UUID)) continue; + defer string_list.clearRetainingCapacity(); + + EntityWriter.writeEntityJSON( + writer, + row, + additional_data, + data_types, + ) catch |err| { + sync_context.logError("Error writing entity", err); + return; + }; + map.put(row[0].UUID, parent_alloc.dupe(u8, string_list.items)) catch |err| { + sync_context.logError("Error writing entity", err); + return; + }; + + if (sync_context.incrementAndCheckStructLimit()) break; + } + + _ = sync_context.completeThread(); + } + // --------------------Change existing files-------------------- // TODO: Make it in batch too diff --git a/src/stuffs/additionalData.zig b/src/stuffs/additionalData.zig index 254ca34..505f6b2 100644 --- a/src/stuffs/additionalData.zig +++ b/src/stuffs/additionalData.zig @@ -1,5 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; +const RelationMap = @import("relationMap.zig").RelationMap; const dtype = @import("dtype"); const DataType = dtype.DataType; @@ -33,6 +34,15 @@ pub const AdditionalData = struct { self.childrens.items[self.childrens.items.len - 1].additional_data = AdditionalData.init(self.allocator); return &self.childrens.items[self.childrens.items.len - 1].additional_data.?; } + + /// Create an array of empty RelationMap based on the additionalData + pub fn relationMapArrayInit(self: AdditionalData, allocator: Allocator) ZipponError!?[]RelationMap { + // So here I should have relationship if children are relations + var array = std.ArrayList(RelationMap).init(allocator); + for (self.childrens.items) |child| { + child. + } +} }; // This is name in: [name] diff --git a/src/stuffs/relationMap.zig b/src/stuffs/relationMap.zig new file mode 100644 index 0000000..4d7d375 --- /dev/null +++ b/src/stuffs/relationMap.zig @@ -0,0 +1,32 @@ +// A relation map is all data needed to add relationship at the during parsing +// How it work is that, the first time I parse the struct files, like User, I populate a map of UUID empty string +// And in the JSON string I write {|<[16]u8>|} inside. Then I can use this struct to parse the file again +// And if the UUID is in the map, I write the JSON if in its value in the map +// It need to be recurcive as additional data can do stuff like [name, friends [name, best_friend]] +// I could use parseEntities in a recursive way. But that mean ready the file at each loop =/ +// +// No no no, But on the other hands that would solve the issue of getting the UUID of the best_friend +// Fuck thats true, I didnt think about that. I can one populate the UUID that I want from the current depth of the additional data +// So I need to parse multiple time. But that sove when using with multiple parse. +// Because GRAB User [comments [post]]. How do I get the UUID of the Post if I only parse User ? +// +// Ok so I need to go recursive on parseEntities +// So I parse one time, if additional data has relationship, I create a list of RelationMap +// When I parse, I populate RelationMap with UUID I want +// Then for each RelationMap, I parse the files again this time to update the first JSON that now have {|<>|} +// With a sub additionalData. If there is an additional data relation, I recurcive. +// So I need an option in parseEntity to either write the first JSON or update the existing one + +const std = @import("std"); +const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData; + +pub const JsonString = struct { + slice: []const u8 = "", + init: bool = false, +}; + +pub const RelationMap = struct { + struct_name: []const u8, + additional_data: AdditionalData, + map: *std.AutoHashMap([16]u8, JsonString), +}; diff --git a/src/ziqlParser.zig b/src/ziqlParser.zig index 48eed18..7ed9557 100644 --- a/src/ziqlParser.zig +++ b/src/ziqlParser.zig @@ -184,19 +184,13 @@ pub const Parser = struct { var filter = try self.parseFilter(allocator, struct_name, false); defer filter.deinit(); - var buff = std.ArrayList(u8).init(allocator); - defer buff.deinit(); - - try self.file_engine.parseEntities(struct_name, filter, &additional_data, &buff.writer()); - send("{s}", .{buff.items}); + const json_string = try self.file_engine.parseEntities(struct_name, filter, &additional_data, allocator); + send("{s}", .{json_string}); state = .end; }, .eof => { - var buff = std.ArrayList(u8).init(allocator); - defer buff.deinit(); - - try self.file_engine.parseEntities(struct_name, null, &additional_data, &buff.writer()); - send("{s}", .{buff.items}); + const json_string = try self.file_engine.parseEntities(struct_name, null, &additional_data, allocator); + send("{s}", .{json_string}); state = .end; }, else => return printError( diff --git a/test_runner.zig b/test_runner.zig index d534d73..ee5cfee 100644 --- a/test_runner.zig +++ b/test_runner.zig @@ -22,8 +22,10 @@ pub fn myLog( ) void { _ = message_level; _ = scope; - _ = format; - _ = args; + if (true) { + std.debug.print(format, args); + std.debug.print("\n", .{}); + } } // use in custom panic handler