From 7844aacfceddaba86a9a1e0910a7c1b038fced1c Mon Sep 17 00:00:00 2001 From: adrien Date: Mon, 4 May 2026 19:10:06 +0200 Subject: [PATCH] Added a UnitParser to get Dimensions and Scales from a str --- src/Scales.zig | 3 +- src/UnitParser.zig | 145 +++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 2 + 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/UnitParser.zig diff --git a/src/Scales.zig b/src/Scales.zig index e7877e1..d65eeeb 100644 --- a/src/Scales.zig +++ b/src/Scales.zig @@ -59,7 +59,8 @@ pub const UnitScale = enum(isize) { var buf: [16]u8 = undefined; return switch (self) { .none => "", - .P, .T, .G, .M, .k, .h, .da, .d, .c, .m, .u, .n, .p, .f, .min, .hour, .year, .inch, .ft, .yd, .mi, .oz, .lb, .st => @tagName(self), + .P, .T, .G, .M, .k, .h, .da, .d, .c, .m, .u, .n, .p, .f, .min, .year, .inch, .ft, .yd, .mi, .oz, .lb, .st => @tagName(self), + .hour => "h", else => std.fmt.bufPrint(&buf, "[{d}]", .{@intFromEnum(self)}) catch "[]", // This cannot be inline because of non exhaustive enum, but that's ok, it is just str, not calculation }; } diff --git a/src/UnitParser.zig b/src/UnitParser.zig new file mode 100644 index 0000000..f966e7b --- /dev/null +++ b/src/UnitParser.zig @@ -0,0 +1,145 @@ +const std = @import("std"); +const Dimensions = @import("Dimensions.zig"); +const Scales = @import("Scales.zig"); + +/// A container returning the separated arguments needed to construct a Tensor. +pub const ParsedUnit = struct { + dims: Dimensions.ArgOpts = .{}, + scales: Scales.ArgOpts = .{}, +}; + +pub const UnitParseError = error{ + UnknownBaseUnit, + UnknownPrefix, + InvalidExponent, + EmptyStr, +}; + +/// Parses strings like "km/s^2", "m", "kg*m/s^2", "1/min". +/// Evaluates entirely at comptime. +pub fn parseUnit(comptime str: []const u8) !ParsedUnit { + if (str.len == 0) return UnitParseError.EmptyStr; + + var parsed: ParsedUnit = .{ .dims = .{}, .scales = .{} }; + + // We need to track if we are after a '/' to flip exponents to negative + var is_denominator = false; + + // Manual iteration to handle '/' properly + var cursor: usize = 0; + while (cursor < str.len) { + // Find the next segment + const segment_start = cursor; + while (cursor < str.len and str[cursor] != '/' and str[cursor] != '.' and str[cursor] != '*') : (cursor += 1) {} + const segment = str[segment_start..cursor]; + + if (segment.len > 0) { + try parseSegment(segment, &parsed, is_denominator); + } + + if (cursor < str.len) { + if (str[cursor] == '/') { + is_denominator = true; + } + cursor += 1; // skip the separator + } + } + + return parsed; +} + +fn parseSegment(comptime segment: []const u8, parsed: *ParsedUnit, is_denominator: bool) !void { + var scale: Scales.UnitScale = .none; + var found_scale = false; + var active_dim: ?Dimensions.Dimension = null; + + // 1. Try to find a Scale + Dimension pair (e.g., "mm", "km") + inline for (std.enums.values(Scales.UnitScale)) |sca| { + const s_str = sca.str(); + if (s_str.len > 0 and std.mem.startsWith(u8, segment, s_str)) { + // Check if it's a "Unit-as-Scale" (hour, min) or a prefix (k, m, c) + switch (sca) { + .hour, .min, .year => { + // These are dimensions themselves (Time) + if (segment.len == s_str.len or (segment.len > s_str.len and (segment[s_str.len] == '^' or (segment[s_str.len] >= '0' and segment[s_str.len] <= '9')))) { + scale = sca; + active_dim = .T; + found_scale = true; + } + }, + else => { + // Standard prefixes: Must be followed by a valid dimension unit + inline for (std.enums.values(Dimensions.Dimension)) |dim| { + if (std.mem.startsWith(u8, segment[s_str.len..], dim.unit())) { + scale = sca; + active_dim = dim; + found_scale = true; + break; + } + } + }, + } + } + if (found_scale) break; + } + + // 2. If no scale prefix was found, try identifying as a pure Dimension (e.g., "m", "s") + if (!found_scale) { + inline for (std.enums.values(Dimensions.Dimension)) |dim| { + if (std.mem.startsWith(u8, segment, dim.unit())) { + active_dim = dim; + break; + } + } + } + + const dimen = active_dim orelse return UnitParseError.UnknownBaseUnit; + + // 3. Determine where the exponent starts + // If it was a Time Scale (like 'h'), the exponent starts after 'h' + // If it was a Prefix + Dim (like 'km'), it starts after 'km' + const unit_part_len = if (found_scale) + (if (scale == .hour or scale == .min or scale == .year) scale.str().len else scale.str().len + dimen.unit().len) + else + dimen.unit().len; + + const expo_str = segment[unit_part_len..]; + + // 4. Parse Exponent + var expo: i32 = 1; + if (expo_str.len > 0) { + const cleaned_expo = if (expo_str[0] == '^') expo_str[1..] else expo_str; + expo = std.fmt.parseInt(i32, cleaned_expo, 10) catch return UnitParseError.InvalidExponent; + } + + if (is_denominator) expo *= -1; + + // 5. Assign to struct + inline for (std.meta.fields(Dimensions.ArgOpts)) |f| { + if (std.mem.eql(u8, f.name, @tagName(dimen))) { + @field(parsed.dims, f.name) += expo; + @field(parsed.scales, f.name) = scale; + } + } +} + +inline fn testParser( + comptime str: []const u8, + comptime expected_dims: Dimensions.ArgOpts, + comptime expected_scales: Scales.ArgOpts, +) !void { + const unit = comptime try parseUnit(str); + if (comptime !Dimensions.init(expected_dims).eql(Dimensions.init(unit.dims))) return error.WrongDims; + if (comptime !Scales.init(expected_scales).eql(Scales.init(unit.scales))) return error.WrongScales; +} + +test "parseUnit" { + @setEvalBranchQuota(10000); + try testParser("m", .{ .L = 1 }, .{}); + try testParser("s", .{ .T = 1 }, .{}); + try testParser("mm", .{ .L = 1 }, .{ .L = .m }); + try testParser("m/s", .{ .L = 1, .T = -1 }, .{}); + try testParser("m1/s2/kg", .{ .L = 1, .T = -2, .M = -1 }, .{ .M = .k }); + try testParser("km/h", .{ .L = 1, .T = -1 }, .{ .L = .k, .T = .hour }); + try testParser("m.s^-1", .{ .L = 1, .T = -1 }, .{}); +} diff --git a/src/main.zig b/src/main.zig index 33ba626..3f760ec 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,10 +4,12 @@ pub const Tensor = @import("Tensor.zig").Tensor; pub const Dimensions = @import("Dimensions.zig"); pub const Scales = @import("Scales.zig"); pub const Base = @import("Base.zig"); +pub const UnitParser = @import("UnitParser.zig"); test { _ = @import("Tensor.zig"); _ = @import("Dimensions.zig"); _ = @import("Scales.zig"); _ = @import("Base.zig"); + _ = @import("UnitParser.zig"); }