From ac07ddadeb36019c262b4f28a4fa3884f6f50b32 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 5 May 2023 13:26:58 -0700 Subject: [PATCH] InternPool: enhance integer values The Key struct now has a Storage tagged union which can store a u64, i64, or big int. This is needed so that indexToKey can be implemented for integers stored compactly in the data structure. --- src/InternPool.zig | 244 ++++++++++++++++++++++++++++++--------------- src/Sema.zig | 6 +- src/value.zig | 142 ++++++++++++++++---------- 3 files changed, 257 insertions(+), 135 deletions(-) diff --git a/src/InternPool.zig b/src/InternPool.zig index 7328c74b4f..1da0572bd4 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -11,6 +11,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const BigIntConst = std.math.big.int.Const; +const BigIntMutable = std.math.big.int.Mutable; const InternPool = @This(); const DeclIndex = enum(u32) { _ }; @@ -50,10 +51,7 @@ pub const Key = union(enum) { /// Index into the string table bytes. lib_name: u32, }, - int: struct { - ty: Index, - big_int: BigIntConst, - }, + int: Key.Int, enum_tag: struct { ty: Index, tag: BigIntConst, @@ -110,6 +108,32 @@ pub const Key = union(enum) { child: Index, }; + pub const Int = struct { + ty: Index, + storage: Storage, + + pub const Storage = union(enum) { + u64: u64, + i64: i64, + big_int: BigIntConst, + + /// Big enough to fit any non-BigInt value + pub const BigIntSpace = struct { + /// The +1 is headroom so that operations such as incrementing once + /// or decrementing once are possible without using an allocator. + limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb, + }; + + pub fn toBigInt(storage: Storage, space: *BigIntSpace) BigIntConst { + return switch (storage) { + .big_int => |x| x, + .u64 => |x| BigIntMutable.init(&space.limbs, x).toConst(), + .i64 => |x| BigIntMutable.init(&space.limbs, x).toConst(), + }; + } + }; + }; + pub fn hash32(key: Key) u32 { return @truncate(u32, key.hash64()); } @@ -137,9 +161,13 @@ pub const Key = union(enum) { => |info| std.hash.autoHash(hasher, info), .int => |int| { + // Canonicalize all integers by converting them to BigIntConst. + var buffer: Key.Int.Storage.BigIntSpace = undefined; + const big_int = int.storage.toBigInt(&buffer); + std.hash.autoHash(hasher, int.ty); - std.hash.autoHash(hasher, int.big_int.positive); - for (int.big_int.limbs) |limb| std.hash.autoHash(hasher, limb); + std.hash.autoHash(hasher, big_int.positive); + for (big_int.limbs) |limb| std.hash.autoHash(hasher, limb); }, .enum_tag => |enum_tag| { @@ -573,42 +601,27 @@ pub const static_keys = [_]Key{ .{ .int = .{ .ty = .comptime_int_type, - .big_int = .{ - .limbs = &.{0}, - .positive = true, - }, + .storage = .{ .u64 = 0 }, } }, .{ .int = .{ .ty = .usize_type, - .big_int = .{ - .limbs = &.{0}, - .positive = true, - }, + .storage = .{ .u64 = 0 }, } }, .{ .int = .{ .ty = .u8_type, - .big_int = .{ - .limbs = &.{0}, - .positive = true, - }, + .storage = .{ .u64 = 0 }, } }, .{ .int = .{ .ty = .comptime_int_type, - .big_int = .{ - .limbs = &.{1}, - .positive = true, - }, + .storage = .{ .u64 = 1 }, } }, .{ .int = .{ .ty = .usize_type, - .big_int = .{ - .limbs = &.{1}, - .positive = true, - }, + .storage = .{ .u64 = 1 }, } }, .{ .enum_tag = .{ @@ -680,19 +693,19 @@ pub const Tag = enum(u8) { simple_internal, /// Type: u32 /// data is integer value - int_small_u32, + int_u32, /// Type: i32 /// data is integer value bitcasted to u32. - int_small_i32, + int_i32, /// A usize that fits in 32 bits. /// data is integer value. - int_small_usize, + int_usize, /// A comptime_int that fits in a u32. /// data is integer value. - int_small_comptime_unsigned, + int_comptime_int_u32, /// A comptime_int that fits in an i32. /// data is integer value bitcasted to u32. - int_small_comptime_signed, + int_comptime_int_i32, /// A positive integer value. /// data is a limbs index to Int. int_positive, @@ -932,11 +945,26 @@ pub fn indexToKey(ip: InternPool, index: Index) Key { .type_error_union => @panic("TODO"), .type_enum_simple => @panic("TODO"), .simple_internal => @panic("TODO"), - .int_small_u32 => @panic("TODO"), - .int_small_i32 => @panic("TODO"), - .int_small_usize => @panic("TODO"), - .int_small_comptime_unsigned => @panic("TODO"), - .int_small_comptime_signed => @panic("TODO"), + .int_u32 => return .{ .int = .{ + .ty = .u32_type, + .storage = .{ .u64 = data }, + } }, + .int_i32 => return .{ .int = .{ + .ty = .i32_type, + .storage = .{ .i64 = @bitCast(i32, data) }, + } }, + .int_usize => return .{ .int = .{ + .ty = .usize_type, + .storage = .{ .u64 = data }, + } }, + .int_comptime_int_u32 => return .{ .int = .{ + .ty = .comptime_int_type, + .storage = .{ .u64 = data }, + } }, + .int_comptime_int_i32 => return .{ .int = .{ + .ty = .comptime_int_type, + .storage = .{ .i64 = @bitCast(i32, data) }, + } }, .int_positive => @panic("TODO"), .int_negative => @panic("TODO"), .enum_tag_positive => @panic("TODO"), @@ -1041,54 +1069,114 @@ pub fn get(ip: *InternPool, gpa: Allocator, key: Key) Allocator.Error!Index { .int => |int| b: { switch (int.ty) { - .u32_type => { - if (int.big_int.fits(u32)) { - ip.items.appendAssumeCapacity(.{ - .tag = .int_small_u32, - .data = int.big_int.to(u32) catch unreachable, - }); - break :b; - } + .u32_type => switch (int.storage) { + .big_int => |big_int| { + if (big_int.to(u32)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_u32, + .data = casted, + }); + break :b; + } else |_| {} + }, + inline .u64, .i64 => |x| { + if (std.math.cast(u32, x)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_u32, + .data = casted, + }); + break :b; + } + }, }, - .i32_type => { - if (int.big_int.fits(i32)) { - ip.items.appendAssumeCapacity(.{ - .tag = .int_small_i32, - .data = @bitCast(u32, int.big_int.to(i32) catch unreachable), - }); - break :b; - } + .i32_type => switch (int.storage) { + .big_int => |big_int| { + if (big_int.to(i32)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_i32, + .data = @bitCast(u32, casted), + }); + break :b; + } else |_| {} + }, + inline .u64, .i64 => |x| { + if (std.math.cast(i32, x)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_u32, + .data = @bitCast(u32, casted), + }); + break :b; + } + }, }, - .usize_type => { - if (int.big_int.fits(u32)) { - ip.items.appendAssumeCapacity(.{ - .tag = .int_small_usize, - .data = int.big_int.to(u32) catch unreachable, - }); - break :b; - } + .usize_type => switch (int.storage) { + .big_int => |big_int| { + if (big_int.to(u32)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_usize, + .data = casted, + }); + break :b; + } else |_| {} + }, + inline .u64, .i64 => |x| { + if (std.math.cast(u32, x)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_usize, + .data = casted, + }); + break :b; + } + }, }, - .comptime_int_type => { - if (int.big_int.fits(u32)) { - ip.items.appendAssumeCapacity(.{ - .tag = .int_small_comptime_unsigned, - .data = int.big_int.to(u32) catch unreachable, - }); - break :b; - } - if (int.big_int.fits(i32)) { - ip.items.appendAssumeCapacity(.{ - .tag = .int_small_comptime_signed, - .data = @bitCast(u32, int.big_int.to(i32) catch unreachable), - }); - break :b; - } + .comptime_int_type => switch (int.storage) { + .big_int => |big_int| { + if (big_int.to(u32)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_comptime_int_u32, + .data = casted, + }); + break :b; + } else |_| {} + if (big_int.to(i32)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_comptime_int_i32, + .data = @bitCast(u32, casted), + }); + break :b; + } else |_| {} + }, + inline .u64, .i64 => |x| { + if (std.math.cast(u32, x)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_comptime_int_u32, + .data = casted, + }); + break :b; + } + if (std.math.cast(i32, x)) |casted| { + ip.items.appendAssumeCapacity(.{ + .tag = .int_comptime_int_i32, + .data = @bitCast(u32, casted), + }); + break :b; + } + }, }, else => {}, } - - const tag: Tag = if (int.big_int.positive) .int_positive else .int_negative; - try addInt(ip, gpa, int.ty, tag, int.big_int.limbs); + switch (int.storage) { + .big_int => |big_int| { + const tag: Tag = if (big_int.positive) .int_positive else .int_negative; + try addInt(ip, gpa, int.ty, tag, big_int.limbs); + }, + inline .i64, .u64 => |x| { + var buf: [2]usize = undefined; + const big_int = BigIntMutable.init(&buf, x).toConst(); + const tag: Tag = if (big_int.positive) .int_positive else .int_negative; + try addInt(ip, gpa, int.ty, tag, big_int.limbs); + }, + } }, .enum_tag => |enum_tag| { diff --git a/src/Sema.zig b/src/Sema.zig index 3406d0d80f..37d6f3e902 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1981,7 +1981,7 @@ fn resolveMaybeUndefValAllowVariablesMaybeRuntime( if (air_tags[i] == .constant) { const ty_pl = sema.air_instructions.items(.data)[i].ty_pl; const val = sema.air_values.items[ty_pl.payload]; - if (val.tag() == .variable) return val; + if (val.tagIsVariable()) return val; } return opv; } @@ -5033,7 +5033,7 @@ fn storeToInferredAllocComptime( // There will be only one store_to_inferred_ptr because we are running at comptime. // The alloc will turn into a Decl. if (try sema.resolveMaybeUndefValAllowVariables(operand)) |operand_val| store: { - if (operand_val.tag() == .variable) break :store; + if (operand_val.tagIsVariable()) break :store; var anon_decl = try block.startAnonDecl(); defer anon_decl.deinit(); iac.data.decl_index = try anon_decl.finish( @@ -28125,7 +28125,7 @@ fn beginComptimePtrLoad( const is_mutable = ptr_val.tag() == .decl_ref_mut; const decl = sema.mod.declPtr(decl_index); const decl_tv = try decl.typedValue(); - if (decl_tv.val.tag() == .variable) return error.RuntimeLoad; + if (decl_tv.val.tagIsVariable()) return error.RuntimeLoad; const layout_defined = decl.ty.hasWellDefinedLayout(mod); break :blk ComptimePtrLoadKit{ diff --git a/src/value.zig b/src/value.zig index c2c37ba68b..588218f670 100644 --- a/src/value.zig +++ b/src/value.zig @@ -796,17 +796,17 @@ pub const Value = struct { mod: *const Module, opt_sema: ?*Sema, ) Module.CompileError!BigIntConst { - switch (val.ip_index) { - .bool_false => return BigIntMutable.init(&space.limbs, 0).toConst(), - .bool_true => return BigIntMutable.init(&space.limbs, 1).toConst(), + return switch (val.ip_index) { + .bool_false => BigIntMutable.init(&space.limbs, 0).toConst(), + .bool_true => BigIntMutable.init(&space.limbs, 1).toConst(), .undef => unreachable, - .null_value => return BigIntMutable.init(&space.limbs, 0).toConst(), + .null_value => BigIntMutable.init(&space.limbs, 0).toConst(), .none => switch (val.tag()) { .zero, .the_only_possible_value, // i0, u0 - => return BigIntMutable.init(&space.limbs, 0).toConst(), + => BigIntMutable.init(&space.limbs, 0).toConst(), - .one => return BigIntMutable.init(&space.limbs, 1).toConst(), + .one => BigIntMutable.init(&space.limbs, 1).toConst(), .enum_field_index => { const index = val.castTag(.enum_field_index).?.data; @@ -816,10 +816,10 @@ pub const Value = struct { const sub_val = val.castTag(.runtime_value).?.data; return sub_val.toBigIntAdvanced(space, mod, opt_sema); }, - .int_u64 => return BigIntMutable.init(&space.limbs, val.castTag(.int_u64).?.data).toConst(), - .int_i64 => return BigIntMutable.init(&space.limbs, val.castTag(.int_i64).?.data).toConst(), - .int_big_positive => return val.castTag(.int_big_positive).?.asBigInt(), - .int_big_negative => return val.castTag(.int_big_negative).?.asBigInt(), + .int_u64 => BigIntMutable.init(&space.limbs, val.castTag(.int_u64).?.data).toConst(), + .int_i64 => BigIntMutable.init(&space.limbs, val.castTag(.int_i64).?.data).toConst(), + .int_big_positive => val.castTag(.int_big_positive).?.asBigInt(), + .int_big_negative => val.castTag(.int_big_negative).?.asBigInt(), .lazy_align => { const ty = val.castTag(.lazy_align).?.data; @@ -849,10 +849,10 @@ pub const Value = struct { else => unreachable, }, else => switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| return int.big_int, + .int => |int| int.storage.toBigInt(space), else => unreachable, }, - } + }; } /// If the value fits in a u64, return it, otherwise null. @@ -900,7 +900,11 @@ pub const Value = struct { else => return null, }, else => return switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| int.big_int.to(u64) catch null, + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.to(u64) catch null, + .u64 => |x| x, + .i64 => |x| std.math.cast(u64, x), + }, else => null, }, } @@ -940,18 +944,22 @@ pub const Value = struct { else => unreachable, }, - else => switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| return int.big_int.to(i64) catch unreachable, + else => return switch (mod.intern_pool.indexToKey(val.ip_index)) { + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.to(i64) catch unreachable, + .i64 => |x| x, + .u64 => |x| @intCast(i64, x), + }, else => unreachable, }, } } pub fn toBool(val: Value, mod: *const Module) bool { - switch (val.ip_index) { - .bool_true => return true, - .bool_false => return false, - .none => return switch (val.tag()) { + return switch (val.ip_index) { + .bool_true => true, + .bool_false => false, + .none => switch (val.tag()) { .one => true, .zero => false, @@ -968,10 +976,13 @@ pub const Value = struct { else => unreachable, }, else => switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| return !int.big_int.eqZero(), + .int => |int| switch (int.storage) { + .big_int => |big_int| !big_int.eqZero(), + inline .u64, .i64 => |x| x != 0, + }, else => unreachable, }, - } + }; } fn isDeclRef(val: Value) bool { @@ -1483,12 +1494,12 @@ pub const Value = struct { pub fn clz(val: Value, ty: Type, mod: *const Module) u64 { const ty_bits = ty.intInfo(mod).bits; - switch (val.ip_index) { - .bool_false => return ty_bits, - .bool_true => return ty_bits - 1, + return switch (val.ip_index) { + .bool_false => ty_bits, + .bool_true => ty_bits - 1, .none => switch (val.tag()) { - .zero => return ty_bits, - .one => return ty_bits - 1, + .zero => ty_bits, + .one => ty_bits - 1, .int_u64 => { const big = @clz(val.castTag(.int_u64).?.data); @@ -1519,20 +1530,24 @@ pub const Value = struct { else => unreachable, }, else => switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| return int.big_int.clz(ty_bits), + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.clz(ty_bits), + .u64 => |x| @clz(x) + ty_bits - 64, + .i64 => @panic("TODO implement i64 Value clz"), + }, else => unreachable, }, - } + }; } pub fn ctz(val: Value, ty: Type, mod: *const Module) u64 { const ty_bits = ty.intInfo(mod).bits; - switch (val.ip_index) { - .bool_false => return ty_bits, - .bool_true => return 0, + return switch (val.ip_index) { + .bool_false => ty_bits, + .bool_true => 0, .none => switch (val.tag()) { - .zero => return ty_bits, - .one => return 0, + .zero => ty_bits, + .one => 0, .int_u64 => { const big = @ctz(val.castTag(.int_u64).?.data); @@ -1563,10 +1578,17 @@ pub const Value = struct { else => unreachable, }, else => switch (mod.intern_pool.indexToKey(val.ip_index)) { - .int => |int| return int.big_int.ctz(), + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.ctz(), + .u64 => |x| { + const big = @ctz(x); + return if (big == 64) ty_bits else big; + }, + .i64 => @panic("TODO implement i64 Value ctz"), + }, else => unreachable, }, - } + }; } pub fn popCount(val: Value, ty: Type, mod: *const Module) u64 { @@ -1591,7 +1613,9 @@ pub const Value = struct { else => switch (mod.intern_pool.indexToKey(val.ip_index)) { .int => |int| { const info = ty.intInfo(mod); - return int.big_int.popCount(info.bits); + var buffer: Value.BigIntSpace = undefined; + const big_int = int.storage.toBigInt(&buffer); + return @intCast(u64, big_int.popCount(info.bits)); }, else => unreachable, }, @@ -1641,23 +1665,23 @@ pub const Value = struct { /// Returns the number of bits the value requires to represent stored in twos complement form. pub fn intBitCountTwosComp(self: Value, mod: *const Module) usize { const target = mod.getTarget(); - switch (self.ip_index) { - .bool_false => return 0, - .bool_true => return 1, + return switch (self.ip_index) { + .bool_false => 0, + .bool_true => 1, .none => switch (self.tag()) { .zero, .the_only_possible_value, - => return 0, + => 0, - .one => return 1, + .one => 1, .int_u64 => { const x = self.castTag(.int_u64).?.data; if (x == 0) return 0; return @intCast(usize, std.math.log2(x) + 1); }, - .int_big_positive => return self.castTag(.int_big_positive).?.asBigInt().bitCountTwosComp(), - .int_big_negative => return self.castTag(.int_big_negative).?.asBigInt().bitCountTwosComp(), + .int_big_positive => self.castTag(.int_big_positive).?.asBigInt().bitCountTwosComp(), + .int_big_negative => self.castTag(.int_big_negative).?.asBigInt().bitCountTwosComp(), .decl_ref_mut, .comptime_field_ptr, @@ -1667,7 +1691,7 @@ pub const Value = struct { .variable, .eu_payload_ptr, .opt_payload_ptr, - => return target.ptrBitWidth(), + => target.ptrBitWidth(), else => { var buffer: BigIntSpace = undefined; @@ -1675,10 +1699,18 @@ pub const Value = struct { }, }, else => switch (mod.intern_pool.indexToKey(self.ip_index)) { - .int => |int| return int.big_int.bitCountTwosComp(), + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.bitCountTwosComp(), + .u64 => |x| if (x == 0) 0 else @intCast(usize, std.math.log2(x) + 1), + .i64 => { + var buffer: Value.BigIntSpace = undefined; + const big_int = int.storage.toBigInt(&buffer); + return big_int.bitCountTwosComp(); + }, + }, else => unreachable, }, - } + }; } /// Converts an integer or a float to a float. May result in a loss of information. @@ -1798,8 +1830,11 @@ pub const Value = struct { else => unreachable, }, - else => switch (mod.intern_pool.indexToKey(lhs.ip_index)) { - .int => |int| return int.big_int.orderAgainstScalar(0), + else => return switch (mod.intern_pool.indexToKey(lhs.ip_index)) { + .int => |int| switch (int.storage) { + .big_int => |big_int| big_int.orderAgainstScalar(0), + inline .u64, .i64 => |x| std.math.order(x, 0), + }, else => unreachable, }, } @@ -2777,6 +2812,10 @@ pub const Value = struct { } } + pub fn tagIsVariable(val: Value) bool { + return val.ip_index == .none and val.tag() == .variable; + } + /// Returns true if a Value is backed by a variable pub fn isVariable(val: Value, mod: *Module) bool { return switch (val.ip_index) { @@ -5399,12 +5438,7 @@ pub const Value = struct { }; }; - /// Big enough to fit any non-BigInt value - pub const BigIntSpace = struct { - /// The +1 is headroom so that operations such as incrementing once or decrementing once - /// are possible without using an allocator. - limbs: [(@sizeOf(u64) / @sizeOf(std.math.big.Limb)) + 1]std.math.big.Limb, - }; + pub const BigIntSpace = InternPool.Key.Int.Storage.BigIntSpace; pub const zero = initTag(.zero); pub const one = initTag(.one);