Push before leaving for christmas
This commit is contained in:
parent
e7056efec9
commit
0127daa330
@ -1,14 +1,15 @@
|
||||
- [ ] Delete the .new file if an error happend
|
||||
- [ ] Create a struct that manage the schema
|
||||
- [ ] Array manipulation
|
||||
- [ ] Some time keyword like NOW
|
||||
|
||||
Relationships
|
||||
- [X] Update the schema Parser and Tokenizer
|
||||
- [X] Include the name of the link struct with the schema_struct
|
||||
- [X] New ConditionValue that is an array of UUID
|
||||
- [ ] When relationship found in filter, check if the type is right and exist
|
||||
- [ ] When parseFilter, get list of UUID as value for relationship
|
||||
- [ ] Add new operation in Filter evalue: IN and !IN
|
||||
- [ ] parseNewData can use filter like in "Add User (friends = [10] {age > 20})" to return UUID
|
||||
- [X] When relationship found in filter, check if the type is right and exist
|
||||
- [X] When parseFilter, get list of UUID as value for relationship
|
||||
- [X] Add new operation in Filter evalue: IN and !IN
|
||||
- [~] parseNewData can use filter like in "Add User (friends = [10] {age > 20})" to return UUID
|
||||
- [ ] parseFilter can use sub filter. "GRAB User {friends IN {age > 20}}" At least one friend in a list of UUID
|
||||
- [ ] When send, send the entities in link specify between []
|
||||
|
||||
@ -38,23 +39,22 @@ So I need a Radix tree to be able to find all file to parse.
|
||||
For example if I do "GRAB User [mom] {name = 'Bob'}". I parse one time the file to get all UUID of User that represent mom; the parse that is already done and need to be done. So if I found 3 Bob's mom UUID
|
||||
2. Then I create a map of Bob's UUID as keys and a Str as value. The Str is the JSON string of the mom. For that I need to parse the file again and write using additional_data
|
||||
|
||||
### Radix tree
|
||||
## Run in WASM for a demo
|
||||
|
||||
Ok so new problem. Given a list of UUID, I need a way to find all file index to parse.
|
||||
And even better if I can get the number of UUID per files, so I can stop parsing them early.
|
||||
This could be fun, make a small demo where you get a wasm that run the database locally in the browser.
|
||||
|
||||
Happy to annonce the v0.2 of my database. New feature include:
|
||||
- Relationship
|
||||
- Huge performance increase with multi threading
|
||||
- Date, time and datetime type
|
||||
- Compressed binary files
|
||||
- Logs
|
||||
## How do I return relationship
|
||||
|
||||
All core features of the query language, exept linked queries, is working, v0.3 will focus on adding things around it, including:
|
||||
- Schema migration
|
||||
- Dump/Bump data
|
||||
- Recovery
|
||||
- Better CLI
|
||||
So lets say I have a query that get 100 comments. And I return Comment.User. That mean once I parsed all Comments and got all UUID of User in ConditionValue in a map.
|
||||
I need to get all UUID, meaning concatenating all UUID of all ConditionValue into one map. Then I can parse `User` and create a new map with UUID as key and the JSON string as value.
|
||||
Like that I can iterate as much as I want inside.
|
||||
|
||||
Query optimization for later:
|
||||
- If a filter use id to find something, to stop after find it, as I know there is no other struct with the same id
|
||||
That mean:
|
||||
|
||||
- If I have a link in AdditionalData to
|
||||
- Get all UUID that I need the data (concatenate all maps)
|
||||
- Create a new map UUID/JSON object
|
||||
- Parse files and populate the new maps
|
||||
|
||||
Which also mean that I need to do all of them at the same time at the beguinning. So using AdditionalData, I iterate over all Nodes, find all Links and do what I said above.
|
||||
I can then save those map into a map with as key the path like `Comment.friends` and value the map that contain UUID/JSON
|
||||
|
@ -68,7 +68,6 @@ The main action is `GRAB`, this will parse files and return data.
|
||||
|
||||
#### Basic
|
||||
|
||||
|
||||
Here's how to return all `User` entities without any filtering:
|
||||
```python
|
||||
GRAB User
|
||||
|
278
docs/ZipponData.md
Normal file
278
docs/ZipponData.md
Normal file
@ -0,0 +1,278 @@
|
||||
# ZipponData
|
||||
|
||||
ZipponData is a library developped in the context of [ZipponDB](https://github.com/MrBounty/ZipponDB/tree/v0.1.3).
|
||||
|
||||
The library intent to create a simple way to store and parse data from a file in the most efficient and fast way possible.
|
||||
|
||||
There is 6 data type available in ZipponData:
|
||||
|
||||
| Type | Zig type | Bytes in file |
|
||||
| --- | --- | --- |
|
||||
| int | i32 | 4 |
|
||||
| float | f64 | 8 |
|
||||
| bool | bool | 1 |
|
||||
| str | []u8 | 4 + len |
|
||||
| uuid | [16]u8 | 16 |
|
||||
| unix | u64 | 8 |
|
||||
|
||||
Each type have its array equivalent.
|
||||
|
||||
## Quickstart
|
||||
|
||||
1. Create a file with `createFile`
|
||||
2. Create some `Data`
|
||||
3. Create a `DataWriter`
|
||||
4. Write the data
|
||||
5. Create a schema
|
||||
6. Create an iterator with `DataIterator`
|
||||
7. Iterate over all value
|
||||
8. Delete the file with `deleteFile`
|
||||
|
||||
Here an example of how to use it:
|
||||
``` zig
|
||||
const std = @import("std");
|
||||
|
||||
pub fn main() !void {
|
||||
const allocator = std.testing.allocator;
|
||||
|
||||
// 0. Make a temporary directory
|
||||
try std.fs.cwd().makeDir("tmp");
|
||||
const dir = try std.fs.cwd().openDir("tmp", .{});
|
||||
|
||||
// 1. Create a file
|
||||
try createFile("test", dir);
|
||||
|
||||
// 2. Create some Data
|
||||
const data = [_]Data{
|
||||
Data.initInt(1),
|
||||
Data.initFloat(3.14159),
|
||||
Data.initInt(-5),
|
||||
Data.initStr("Hello world"),
|
||||
Data.initBool(true),
|
||||
Data.initUnix(2021),
|
||||
};
|
||||
|
||||
// 3. Create a DataWriter
|
||||
var dwriter = try DataWriter.init("test", dir);
|
||||
defer dwriter.deinit(); // This just close the file
|
||||
|
||||
// 4. Write some data
|
||||
try dwriter.write(&data);
|
||||
try dwriter.write(&data);
|
||||
try dwriter.write(&data);
|
||||
try dwriter.write(&data);
|
||||
try dwriter.write(&data);
|
||||
try dwriter.write(&data);
|
||||
try dwriter.flush(); // Dont forget to flush !
|
||||
|
||||
// 5. Create a schema
|
||||
// A schema is how the iterator will parse the file.
|
||||
// If you are wrong here, it will return wrong/random data
|
||||
// And most likely an error when iterating in the while loop
|
||||
const schema = &[_]DType{
|
||||
.Int,
|
||||
.Float,
|
||||
.Int,
|
||||
.Str,
|
||||
.Bool,
|
||||
.Unix,
|
||||
};
|
||||
|
||||
// 6. Create a DataIterator
|
||||
var iter = try DataIterator.init(allocator, "test", dir, schema);
|
||||
defer iter.deinit();
|
||||
|
||||
// 7. Iterate over data
|
||||
while (try iter.next()) |row| {
|
||||
std.debug.print("Row: {any}\n", .{ row });
|
||||
}
|
||||
|
||||
// 8. Delete the file (Optional ofc)
|
||||
try deleteFile("test", dir);
|
||||
try std.fs.cwd().deleteDir("tmp");
|
||||
}
|
||||
```
|
||||
|
||||
***Note: The dir can be null and it will use cwd.***
|
||||
|
||||
# Array
|
||||
|
||||
All data type have an array equivalent. To write an array, you need to first encode it using `allocEncodArray` before writing it.
|
||||
This use an allocator so you need to free what it return.
|
||||
|
||||
When read, an array is just the raw bytes. To get the data itself, you need to create an `ArrayIterator`. Here an example:
|
||||
|
||||
```zig
|
||||
pub fn main() !void {
|
||||
const allocator = std.testing.allocator;
|
||||
|
||||
// 0. Make a temporary directory
|
||||
try std.fs.cwd().makeDir("array_tmp");
|
||||
const dir = try std.fs.cwd().openDir("array_tmp", .{});
|
||||
|
||||
// 1. Create a file
|
||||
try createFile("test", dir);
|
||||
|
||||
// 2. Create and encode some Data
|
||||
const int_array = [4]i32{ 32, 11, 15, 99 };
|
||||
const data = [_]Data{
|
||||
Data.initIntArray(try allocEncodArray.Int(allocator, &int_array)), // Encode
|
||||
};
|
||||
defer allocator.free(data[0].IntArray); // DOnt forget to free it
|
||||
|
||||
// 3. Create a DataWriter
|
||||
var dwriter = try DataWriter.init("test", dir);
|
||||
defer dwriter.deinit();
|
||||
|
||||
// 4. Write some data
|
||||
try dwriter.write(&data);
|
||||
try dwriter.flush();
|
||||
|
||||
// 5. Create a schema
|
||||
const schema = &[_]DType{
|
||||
.IntArray,
|
||||
};
|
||||
|
||||
// 6. Create a DataIterator
|
||||
var iter = try DataIterator.init(allocator, "test", dir, schema);
|
||||
defer iter.deinit();
|
||||
|
||||
// 7. Iterate over data
|
||||
var i: usize = 0;
|
||||
if (try iter.next()) |row| {
|
||||
|
||||
// 8. Iterate over array
|
||||
var array_iter = ArrayIterator.init(&row[0]); // Sub array iterator
|
||||
while (array_iter.next()) |d| {
|
||||
try std.testing.expectEqual(int_array[i], d.Int);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
try deleteFile("test", dir);
|
||||
try std.fs.cwd().deleteDir("array_tmp");
|
||||
}
|
||||
```
|
||||
|
||||
# Benchmark
|
||||
|
||||
Done on a AMD Ryzen 7 7800X3D with a Samsung SSD 980 PRO 2TB (up to 7,000/5,100MB/s for read/write speed) on one thread.
|
||||
|
||||
| Rows | Write Time (ms) | Average Write Time (μs) | Read Time (ms) | Average Read Time (μs) | File Size (kB) |
|
||||
| --- | --- | --- | --- | --- | --- |
|
||||
| 1 | 0.01 | 13.63 | 0.025 | 25.0 | 0.04 |
|
||||
| 10 | 0.01 | 1.69 | 0.03 | 3.28 | 0.4 |
|
||||
| 100 | 0.04 | 0.49 | 0.07 | 0.67 | 4.0 |
|
||||
| 1_000 | 0.36 | 0.36 | 0.48 | 0.48 | 40 |
|
||||
| 10_000 | 3.42 | 0.34 | 4.67 | 0.47 | 400 |
|
||||
| 100_000 | 36.39 | 0.36 | 48.00 | 0.49 | 4_000 |
|
||||
| 1_000_000 | 361.41 | 0.36 | 481.00 | 0.48 | 40_000 |
|
||||
|
||||
TODO: Update number to use Unix one. Benchmark on my laptop and maybe on some cloud VM.
|
||||
|
||||
Data use:
|
||||
```zig
|
||||
const schema = &[_]DType{
|
||||
.Int,
|
||||
.Float,
|
||||
.Int,
|
||||
.Str,
|
||||
.Bool,
|
||||
.Unix,
|
||||
};
|
||||
|
||||
const data = &[_]Data{
|
||||
Data.initInt(1),
|
||||
Data.initFloat(3.14159),
|
||||
Data.initInt(-5),
|
||||
Data.initStr("Hello world"),
|
||||
Data.initBool(true),
|
||||
Data.initUnix(2021),
|
||||
};
|
||||
```
|
||||
|
||||
***Note: You can check Benchmark.md in ZipponDB to see performance using multi-threading. Was able to parse 1_000_000 users in less than 100ms***
|
||||
|
||||
# Importing the package
|
||||
|
||||
Create a `build.zig.zon` next to `build.zig` if not already done.
|
||||
|
||||
Add this dependencies in `build.zig.zon`:
|
||||
```zig
|
||||
.ZipponData = .{
|
||||
.url = "git+https://github.com/MrBounty/ZipponData",
|
||||
//the correct hash will be suggested by zig},
|
||||
```
|
||||
|
||||
Here what my complete `build.zig.zon` is for my project ZipponDB:
|
||||
```zig
|
||||
.{
|
||||
.name = "ZipponDB",
|
||||
.version = "0.1.4",
|
||||
.dependencies = .{
|
||||
.ZipponData = .{
|
||||
.url = "git+https://github.com/MrBounty/ZipponData",
|
||||
//the correct hash will be suggested by zig},
|
||||
},
|
||||
.paths = .{
|
||||
"",
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
And in `build.zig` you can import the module like this:
|
||||
```zig
|
||||
const zid = b.dependency("ZipponData", .{});
|
||||
exe.root_module.addImport("ZipponData", zid.module("ZipponData"));
|
||||
```
|
||||
|
||||
And you can now import it like std in your project:
|
||||
```zig
|
||||
const zid = @import("ZipponData");
|
||||
zid.createFile("Hello.zid", null);
|
||||
```
|
||||
|
||||
# What you can't do
|
||||
|
||||
You can't update files. You gonna need to implement that yourself. The easier way (and only I know), is to parse the entire file and write it into another.
|
||||
|
||||
Here an example that evaluate all struct using a `Filter` and write only struct that are false. (A filter can be like `age > 20`, if the member `age` of the struct is `> 20`, it is true):
|
||||
```zig
|
||||
pub fn delete(file_name: []const u8, dir: std.fs.Dir, filter: Filter) !void {
|
||||
// 1. Create the iterator of the current file
|
||||
var iter = try zid.DataIterator.init(self.allocator, file_name, dir, sstruct.zid_schema);
|
||||
defer iter.deinit();
|
||||
|
||||
// 2. Create a new file
|
||||
const new_path_buff = try std.fmt.allocPrint(self.allocator, "{s}.new", .{file_name});
|
||||
defer self.allocator.free(new_path_buff);
|
||||
try zid.createFile(new_path_buff, dir);
|
||||
|
||||
// 3. Create a writer of the new data
|
||||
var new_writer = try zid.DataWriter.init(new_path_buff, dir);
|
||||
defer new_writer.deinit();
|
||||
|
||||
// 4. For all struct, evaluate and write to new file if false
|
||||
while (try iter.next()) |row| {
|
||||
if (!filter.evaluate(row)) {
|
||||
try new_writer.write(row);
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Flush, delete old file and rename new file to previous file
|
||||
try new_writer.flush();
|
||||
try dir.deleteFile(path_buff);
|
||||
try dir.rename(new_path_buff, path_buff);
|
||||
}
|
||||
```
|
||||
|
||||
# Potential update
|
||||
|
||||
I don't plan do update this but it will depend if my other project need it.
|
||||
|
||||
- Functions to update files
|
||||
- Add a header with the data type at the beginning of the file so no need to make a schema and I can check everytime I write if it's in the good format
|
||||
- More type
|
||||
- Multi threading
|
||||
|
@ -62,9 +62,9 @@ Name | Type | Description | Default
|
||||
---- | ---- | ------------------- | ----
|
||||
TODO | TODO | TODO | TODO
|
||||
|
||||
### db state - WIP
|
||||
### db state
|
||||
|
||||
Return the state of the database, either `MissingDatabase` if no database selected or `MissingSchema` if no schema was initialize.
|
||||
Return the state of the database, either `Ok` or `MissingDatabase` if no database selected or `MissingSchema` if no schema was initialize.
|
||||
|
||||
**Usage:**
|
||||
|
||||
|
@ -14,6 +14,7 @@ theme:
|
||||
palette:
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: lime
|
||||
toggle:
|
||||
icon: material/brightness-7
|
||||
name: Switch to dark mode
|
||||
@ -21,6 +22,7 @@ theme:
|
||||
# Palette toggle for dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: lime
|
||||
toggle:
|
||||
icon: material/brightness-4
|
||||
name: Switch to light mode
|
||||
@ -31,6 +33,7 @@ markdown_extensions:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
use_pygments: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
- pymdownx.superfences
|
||||
@ -49,5 +52,6 @@ nav:
|
||||
- Under the Hood:
|
||||
- Benchmark: Benchmark.md
|
||||
- Technical: Technical docs.md
|
||||
- ZipponData: ZipponData.md
|
||||
- About: about.md
|
||||
- Roadmap: Roadmap.md
|
||||
|
@ -77,23 +77,43 @@ pub const EntityWriter = struct {
|
||||
.IntArray => while (iter.next()) |v| writer.print("{d}, ", .{v.Int}) catch return ZipponError.WriteError,
|
||||
.FloatArray => while (iter.next()) |v| writer.print("{d}", .{v.Float}) catch return ZipponError.WriteError,
|
||||
.StrArray => while (iter.next()) |v| writer.print("\"{s}\"", .{v.Str}) catch return ZipponError.WriteError,
|
||||
.UUIDArray => while (iter.next()) |v| writer.print("\"{s}\"", .{UUID.format_bytes(v.UUID)}) catch return ZipponError.WriteError,
|
||||
.UUIDArray => while (iter.next()) |v| writer.print("\"{{|<{s}>|}}\"", .{UUID.format_bytes(v.UUID)}) catch return ZipponError.WriteError,
|
||||
.BoolArray => while (iter.next()) |v| writer.print("{any}", .{v.Bool}) catch return ZipponError.WriteError,
|
||||
.UnixArray => {
|
||||
while (iter.next()) |v| {
|
||||
const datetime = DateTime.initUnix(v.Unix);
|
||||
writer.writeByte('"') catch return ZipponError.WriteError;
|
||||
switch (data_type) {
|
||||
.date => datetime.format("YYYY/MM/DD", writer) catch return ZipponError.WriteError,
|
||||
.time => datetime.format("HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError,
|
||||
.datetime => datetime.format("YYYY/MM/DD-HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError,
|
||||
else => unreachable,
|
||||
}
|
||||
writer.writeAll("\", ") catch return ZipponError.WriteError;
|
||||
.UnixArray => while (iter.next()) |v| {
|
||||
const datetime = DateTime.initUnix(v.Unix);
|
||||
writer.writeByte('"') catch return ZipponError.WriteError;
|
||||
switch (data_type) {
|
||||
.date => datetime.format("YYYY/MM/DD", writer) catch return ZipponError.WriteError,
|
||||
.time => datetime.format("HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError,
|
||||
.datetime => datetime.format("YYYY/MM/DD-HH:mm:ss.SSSS", writer) catch return ZipponError.WriteError,
|
||||
else => unreachable,
|
||||
}
|
||||
writer.writeAll("\", ") catch return ZipponError.WriteError;
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
writer.writeByte(']') catch return ZipponError.WriteError;
|
||||
}
|
||||
|
||||
/// TODO:
|
||||
/// Take a string in the JSON format and look for {|<[16]u8>|}, then will look into the map and check if it can find this UUID
|
||||
/// If it find it, it ill replace the {|<[16]u8>|} will the value
|
||||
pub fn updateWithRelation(writer: anytype, input: []const u8, to_add: std.AutoHashMap([16]u8, []const u8)) ZipponError!void {
|
||||
var start: usize = 0;
|
||||
while (std.mem.indexOf(u8, input[start..], "{|<[")) |pos| {
|
||||
const pattern_start = start + pos;
|
||||
const pattern_end = std.mem.indexOf(u8, input[pattern_start..], "]>|}") orelse break;
|
||||
const full_pattern_end = pattern_start + pattern_end + 4;
|
||||
|
||||
// Write the text before the pattern
|
||||
try writer.writeAll(input[start..pattern_start]);
|
||||
|
||||
const uuid_bytes = input[pattern_start + 3 .. full_pattern_end - 3];
|
||||
writer.writeAll(to_add.get(uuid_bytes).?);
|
||||
start = full_pattern_end;
|
||||
}
|
||||
|
||||
// Write any remaining text
|
||||
try writer.writeAll(input[start..]);
|
||||
}
|
||||
};
|
||||
|
@ -17,6 +17,8 @@ const DataType = dtype.DataType;
|
||||
|
||||
const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData;
|
||||
const Filter = @import("stuffs/filter.zig").Filter;
|
||||
const RelationMap = @import("stuffs/relationMap.zig").RelationMap;
|
||||
const JsonString = @import("stuffs/relationMap.zig").JsonString;
|
||||
const ConditionValue = @import("stuffs/filter.zig").ConditionValue;
|
||||
|
||||
const ZipponError = @import("stuffs/errors.zig").ZipponError;
|
||||
@ -382,29 +384,29 @@ pub const FileEngine = struct {
|
||||
|
||||
/// Take a filter, parse all file and if one struct if validate by the filter, write it in a JSON format to the writer
|
||||
/// filter can be null. This will return all of them
|
||||
/// TODO: For relationship, if they are in additional_data and I need to return it with the other members, I will need to parse the file
|
||||
/// This is difficult, because that mean I need to parse file while parsing files ? I dont like that because it may be the same struct
|
||||
/// And because of multi thread, I can read the same file at the same time...
|
||||
pub fn parseEntities(
|
||||
self: *FileEngine,
|
||||
struct_name: []const u8,
|
||||
filter: ?Filter,
|
||||
additional_data: *AdditionalData,
|
||||
writer: anytype,
|
||||
) ZipponError!void {
|
||||
entry_allocator: Allocator,
|
||||
) ZipponError![]const u8 {
|
||||
var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer);
|
||||
fa.reset();
|
||||
const allocator = fa.allocator();
|
||||
|
||||
var buff = std.ArrayList(u8).init(entry_allocator);
|
||||
defer buff.deinit();
|
||||
const writer = buff.writer();
|
||||
|
||||
const sstruct = try self.schema_engine.structName2SchemaStruct(struct_name);
|
||||
const max_file_index = try self.maxFileIndex(sstruct.name);
|
||||
|
||||
log.debug("Max file index {d}", .{max_file_index});
|
||||
|
||||
// If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them
|
||||
if (additional_data.childrens.items.len == 0) {
|
||||
if (additional_data.childrens.items.len == 0)
|
||||
additional_data.populateWithEverythingExceptLink(sstruct.members, sstruct.types) catch return FileEngineError.MemoryError;
|
||||
}
|
||||
|
||||
// Open the dir that contain all files
|
||||
const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{ .access_sub_paths = false });
|
||||
@ -441,9 +443,14 @@ pub const FileEngine = struct {
|
||||
}
|
||||
|
||||
// Append all writer to each other
|
||||
//writer.writeByte('[') catch return FileEngineError.WriteError;
|
||||
writer.writeByte('[') catch return FileEngineError.WriteError;
|
||||
for (thread_writer_list) |list| writer.writeAll(list.items) catch return FileEngineError.WriteError;
|
||||
//writer.writeByte(']') catch return FileEngineError.WriteError;
|
||||
writer.writeByte(']') catch return FileEngineError.WriteError;
|
||||
|
||||
// Here now I need to already have a populated list of RelationMap
|
||||
// I will then call parseEntitiesRelationMap on each
|
||||
|
||||
return buff.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn parseEntitiesOneFile(
|
||||
@ -493,6 +500,141 @@ pub const FileEngine = struct {
|
||||
_ = sync_context.completeThread();
|
||||
}
|
||||
|
||||
// Receive a map of UUID -> null
|
||||
// Will parse the files and update the value to the JSON string of the entity that represent the key
|
||||
// Will then write the input with the JSON in the map looking for {|<>|}
|
||||
// Once the new input received, call parseEntitiesRelationMap again the string still contain {|<>|} because of sub relationship
|
||||
// The buffer contain the string with {|<>|} and need to be updated at the end
|
||||
// TODO: Filter file that need to be parse to prevent parsing everything all the time
|
||||
pub fn parseEntitiesRelationMap(
|
||||
self: *FileEngine,
|
||||
relation_map: *RelationMap,
|
||||
buff: *std.ArrayList(u8),
|
||||
) ZipponError!void {
|
||||
var fa = std.heap.FixedBufferAllocator.init(&parsing_buffer);
|
||||
fa.reset();
|
||||
const allocator = fa.allocator();
|
||||
|
||||
var new_buff = std.ArrayList(u8).init(allocator);
|
||||
defer new_buff.deinit();
|
||||
const writer = new_buff.writer();
|
||||
|
||||
const sstruct = try self.schema_engine.structName2SchemaStruct(relation_map.struct_name);
|
||||
const max_file_index = try self.maxFileIndex(sstruct.name);
|
||||
|
||||
log.debug("Max file index {d}", .{max_file_index});
|
||||
|
||||
// If there is no member to find, that mean we need to return all members, so let's populate additional data with all of them
|
||||
if (relation_map.additional_data.childrens.items.len == 0) {
|
||||
relation_map.additional_data.populateWithEverythingExceptLink(sstruct.members, sstruct.types) catch return FileEngineError.MemoryError;
|
||||
}
|
||||
|
||||
// Open the dir that contain all files
|
||||
const dir = try utils.printOpenDir("{s}/DATA/{s}", .{ self.path_to_ZipponDB_dir, sstruct.name }, .{ .access_sub_paths = false });
|
||||
|
||||
// Multi thread stuffs
|
||||
var sync_context = ThreadSyncContext.init(
|
||||
relation_map.additional_data.limit,
|
||||
max_file_index + 1,
|
||||
);
|
||||
|
||||
// Do one writer for each thread otherwise it create error by writing at the same time
|
||||
var thread_map_list = allocator.alloc(std.AutoHashMap([16]u8, JsonString), max_file_index + 1) catch return FileEngineError.MemoryError;
|
||||
|
||||
// Start parsing all file in multiple thread
|
||||
for (0..(max_file_index + 1)) |file_index| {
|
||||
thread_map_list[file_index] = relation_map.map.cloneWithAllocator(allocator);
|
||||
|
||||
self.thread_pool.spawn(parseEntitiesRelationMapOneFile, .{
|
||||
&thread_map_list[file_index],
|
||||
file_index,
|
||||
dir,
|
||||
sstruct.zid_schema,
|
||||
relation_map.additional_data,
|
||||
try self.schema_engine.structName2DataType(relation_map.struct_name),
|
||||
&sync_context,
|
||||
}) catch return FileEngineError.ThreadError;
|
||||
}
|
||||
|
||||
// Wait for all thread to either finish or return an error
|
||||
while (!sync_context.isComplete()) {
|
||||
std.time.sleep(10_000_000); // Check every 10ms
|
||||
}
|
||||
|
||||
// Now here I should have a list of copy of the map with all UUID a bit everywhere
|
||||
|
||||
// Put all in the same map
|
||||
for (thread_map_list) |map| {
|
||||
var iter = map.iterator();
|
||||
while (iter.next()) |entry| {
|
||||
if (entry.value_ptr.*) |json_string| relation_map.map.put(entry.key_ptr.*, json_string);
|
||||
}
|
||||
}
|
||||
|
||||
// Here I write the new string and update the buff to have the new version
|
||||
EntityWriter.updateWithRelation(writer, buff.items, relation_map.map);
|
||||
buff.clearRetainingCapacity();
|
||||
buff.writer().writeAll(new_buff.items);
|
||||
|
||||
// Now here I need to iterate if buff.items still have {|<>|}
|
||||
}
|
||||
|
||||
fn parseEntitiesRelationMapOneFile(
|
||||
map: *std.AutoHashMap([16]u8, []const u8),
|
||||
file_index: u64,
|
||||
dir: std.fs.Dir,
|
||||
zid_schema: []zid.DType,
|
||||
additional_data: *AdditionalData,
|
||||
data_types: []const DataType,
|
||||
sync_context: *ThreadSyncContext,
|
||||
) void {
|
||||
var data_buffer: [BUFFER_SIZE]u8 = undefined;
|
||||
var fa = std.heap.FixedBufferAllocator.init(&data_buffer);
|
||||
defer fa.reset();
|
||||
const allocator = fa.allocator();
|
||||
|
||||
const parent_alloc = map.allocator;
|
||||
var string_list = std.ArrayList(u8).init(allocator);
|
||||
const writer = string_list.writer();
|
||||
|
||||
const path = std.fmt.bufPrint(&path_buffer, "{d}.zid", .{file_index}) catch |err| {
|
||||
sync_context.logError("Error creating file path", err);
|
||||
return;
|
||||
};
|
||||
|
||||
var iter = zid.DataIterator.init(allocator, path, dir, zid_schema) catch |err| {
|
||||
sync_context.logError("Error initializing DataIterator", err);
|
||||
return;
|
||||
};
|
||||
|
||||
while (iter.next() catch |err| {
|
||||
sync_context.logError("Error in iter next", err);
|
||||
return;
|
||||
}) |row| {
|
||||
if (sync_context.checkStructLimit()) break;
|
||||
if (!map.contains(row[0].UUID)) continue;
|
||||
defer string_list.clearRetainingCapacity();
|
||||
|
||||
EntityWriter.writeEntityJSON(
|
||||
writer,
|
||||
row,
|
||||
additional_data,
|
||||
data_types,
|
||||
) catch |err| {
|
||||
sync_context.logError("Error writing entity", err);
|
||||
return;
|
||||
};
|
||||
map.put(row[0].UUID, parent_alloc.dupe(u8, string_list.items)) catch |err| {
|
||||
sync_context.logError("Error writing entity", err);
|
||||
return;
|
||||
};
|
||||
|
||||
if (sync_context.incrementAndCheckStructLimit()) break;
|
||||
}
|
||||
|
||||
_ = sync_context.completeThread();
|
||||
}
|
||||
|
||||
// --------------------Change existing files--------------------
|
||||
|
||||
// TODO: Make it in batch too
|
||||
|
@ -1,5 +1,6 @@
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const RelationMap = @import("relationMap.zig").RelationMap;
|
||||
const dtype = @import("dtype");
|
||||
const DataType = dtype.DataType;
|
||||
|
||||
@ -33,6 +34,15 @@ pub const AdditionalData = struct {
|
||||
self.childrens.items[self.childrens.items.len - 1].additional_data = AdditionalData.init(self.allocator);
|
||||
return &self.childrens.items[self.childrens.items.len - 1].additional_data.?;
|
||||
}
|
||||
|
||||
/// Create an array of empty RelationMap based on the additionalData
|
||||
pub fn relationMapArrayInit(self: AdditionalData, allocator: Allocator) ZipponError!?[]RelationMap {
|
||||
// So here I should have relationship if children are relations
|
||||
var array = std.ArrayList(RelationMap).init(allocator);
|
||||
for (self.childrens.items) |child| {
|
||||
child.
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// This is name in: [name]
|
||||
|
32
src/stuffs/relationMap.zig
Normal file
32
src/stuffs/relationMap.zig
Normal file
@ -0,0 +1,32 @@
|
||||
// A relation map is all data needed to add relationship at the during parsing
|
||||
// How it work is that, the first time I parse the struct files, like User, I populate a map of UUID empty string
|
||||
// And in the JSON string I write {|<[16]u8>|} inside. Then I can use this struct to parse the file again
|
||||
// And if the UUID is in the map, I write the JSON if in its value in the map
|
||||
// It need to be recurcive as additional data can do stuff like [name, friends [name, best_friend]]
|
||||
// I could use parseEntities in a recursive way. But that mean ready the file at each loop =/
|
||||
//
|
||||
// No no no, But on the other hands that would solve the issue of getting the UUID of the best_friend
|
||||
// Fuck thats true, I didnt think about that. I can one populate the UUID that I want from the current depth of the additional data
|
||||
// So I need to parse multiple time. But that sove when using with multiple parse.
|
||||
// Because GRAB User [comments [post]]. How do I get the UUID of the Post if I only parse User ?
|
||||
//
|
||||
// Ok so I need to go recursive on parseEntities
|
||||
// So I parse one time, if additional data has relationship, I create a list of RelationMap
|
||||
// When I parse, I populate RelationMap with UUID I want
|
||||
// Then for each RelationMap, I parse the files again this time to update the first JSON that now have {|<>|}
|
||||
// With a sub additionalData. If there is an additional data relation, I recurcive.
|
||||
// So I need an option in parseEntity to either write the first JSON or update the existing one
|
||||
|
||||
const std = @import("std");
|
||||
const AdditionalData = @import("stuffs/additionalData.zig").AdditionalData;
|
||||
|
||||
pub const JsonString = struct {
|
||||
slice: []const u8 = "",
|
||||
init: bool = false,
|
||||
};
|
||||
|
||||
pub const RelationMap = struct {
|
||||
struct_name: []const u8,
|
||||
additional_data: AdditionalData,
|
||||
map: *std.AutoHashMap([16]u8, JsonString),
|
||||
};
|
@ -184,19 +184,13 @@ pub const Parser = struct {
|
||||
var filter = try self.parseFilter(allocator, struct_name, false);
|
||||
defer filter.deinit();
|
||||
|
||||
var buff = std.ArrayList(u8).init(allocator);
|
||||
defer buff.deinit();
|
||||
|
||||
try self.file_engine.parseEntities(struct_name, filter, &additional_data, &buff.writer());
|
||||
send("{s}", .{buff.items});
|
||||
const json_string = try self.file_engine.parseEntities(struct_name, filter, &additional_data, allocator);
|
||||
send("{s}", .{json_string});
|
||||
state = .end;
|
||||
},
|
||||
.eof => {
|
||||
var buff = std.ArrayList(u8).init(allocator);
|
||||
defer buff.deinit();
|
||||
|
||||
try self.file_engine.parseEntities(struct_name, null, &additional_data, &buff.writer());
|
||||
send("{s}", .{buff.items});
|
||||
const json_string = try self.file_engine.parseEntities(struct_name, null, &additional_data, allocator);
|
||||
send("{s}", .{json_string});
|
||||
state = .end;
|
||||
},
|
||||
else => return printError(
|
||||
|
@ -22,8 +22,10 @@ pub fn myLog(
|
||||
) void {
|
||||
_ = message_level;
|
||||
_ = scope;
|
||||
_ = format;
|
||||
_ = args;
|
||||
if (true) {
|
||||
std.debug.print(format, args);
|
||||
std.debug.print("\n", .{});
|
||||
}
|
||||
}
|
||||
|
||||
// use in custom panic handler
|
||||
|
Loading…
x
Reference in New Issue
Block a user