From 41e1cd185b82a518c58c92544c45f0348c03ef74 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 7 May 2018 01:04:43 -0400 Subject: [PATCH] std.SegmentedList implementation --- CMakeLists.txt | 57 ++++----- std/index.zig | 2 + std/math/index.zig | 26 ++++ std/math/log2.zig | 7 +- std/segmented_list.zig | 272 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 330 insertions(+), 34 deletions(-) create mode 100644 std/segmented_list.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index 36f62725da..d435092723 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -416,8 +416,8 @@ set(ZIG_CPP_SOURCES set(ZIG_STD_FILES "array_list.zig" "atomic/index.zig" - "atomic/stack.zig" "atomic/queue.zig" + "atomic/stack.zig" "base64.zig" "buf_map.zig" "buf_set.zig" @@ -427,13 +427,13 @@ set(ZIG_STD_FILES "c/index.zig" "c/linux.zig" "c/windows.zig" + "crypto/blake2.zig" + "crypto/hmac.zig" "crypto/index.zig" "crypto/md5.zig" "crypto/sha1.zig" "crypto/sha2.zig" "crypto/sha3.zig" - "crypto/blake2.zig" - "crypto/hmac.zig" "cstr.zig" "debug/failing_allocator.zig" "debug/index.zig" @@ -445,12 +445,12 @@ set(ZIG_STD_FILES "fmt/errol/index.zig" "fmt/errol/lookup.zig" "fmt/index.zig" - "hash_map.zig" - "hash/index.zig" "hash/adler.zig" "hash/crc.zig" "hash/fnv.zig" + "hash/index.zig" "hash/siphash.zig" + "hash_map.zig" "heap.zig" "index.zig" "io.zig" @@ -466,6 +466,28 @@ set(ZIG_STD_FILES "math/atanh.zig" "math/cbrt.zig" "math/ceil.zig" + "math/complex/abs.zig" + "math/complex/acos.zig" + "math/complex/acosh.zig" + "math/complex/arg.zig" + "math/complex/asin.zig" + "math/complex/asinh.zig" + "math/complex/atan.zig" + "math/complex/atanh.zig" + "math/complex/conj.zig" + "math/complex/cos.zig" + "math/complex/cosh.zig" + "math/complex/exp.zig" + "math/complex/index.zig" + "math/complex/ldexp.zig" + "math/complex/log.zig" + "math/complex/pow.zig" + "math/complex/proj.zig" + "math/complex/sin.zig" + "math/complex/sinh.zig" + "math/complex/sqrt.zig" + "math/complex/tan.zig" + "math/complex/tanh.zig" "math/copysign.zig" "math/cos.zig" "math/cosh.zig" @@ -502,33 +524,12 @@ set(ZIG_STD_FILES "math/tan.zig" "math/tanh.zig" "math/trunc.zig" - "math/complex/abs.zig" - "math/complex/acosh.zig" - "math/complex/acos.zig" - "math/complex/arg.zig" - "math/complex/asinh.zig" - "math/complex/asin.zig" - "math/complex/atanh.zig" - "math/complex/atan.zig" - "math/complex/conj.zig" - "math/complex/cosh.zig" - "math/complex/cos.zig" - "math/complex/exp.zig" - "math/complex/index.zig" - "math/complex/ldexp.zig" - "math/complex/log.zig" - "math/complex/pow.zig" - "math/complex/proj.zig" - "math/complex/sinh.zig" - "math/complex/sin.zig" - "math/complex/sqrt.zig" - "math/complex/tanh.zig" - "math/complex/tan.zig" "mem.zig" "net.zig" "os/child_process.zig" "os/darwin.zig" "os/darwin_errno.zig" + "os/epoch.zig" "os/file.zig" "os/get_user_id.zig" "os/index.zig" @@ -538,13 +539,13 @@ set(ZIG_STD_FILES "os/linux/x86_64.zig" "os/path.zig" "os/time.zig" - "os/epoch.zig" "os/windows/error.zig" "os/windows/index.zig" "os/windows/util.zig" "os/zen.zig" "rand/index.zig" "rand/ziggurat.zig" + "segmented_list.zig" "sort.zig" "special/bootstrap.zig" "special/bootstrap_lib.zig" diff --git a/std/index.zig b/std/index.zig index 272f2bbc6a..8abfa3db88 100644 --- a/std/index.zig +++ b/std/index.zig @@ -7,6 +7,7 @@ pub const BufferOutStream = @import("buffer.zig").BufferOutStream; pub const HashMap = @import("hash_map.zig").HashMap; pub const LinkedList = @import("linked_list.zig").LinkedList; pub const IntrusiveLinkedList = @import("linked_list.zig").IntrusiveLinkedList; +pub const SegmentedList = @import("segmented_list.zig").SegmentedList; pub const atomic = @import("atomic/index.zig"); pub const base64 = @import("base64.zig"); @@ -43,6 +44,7 @@ test "std" { _ = @import("buffer.zig"); _ = @import("hash_map.zig"); _ = @import("linked_list.zig"); + _ = @import("segmented_list.zig"); _ = @import("base64.zig"); _ = @import("build.zig"); diff --git a/std/math/index.zig b/std/math/index.zig index 83ba055329..a549a6bb61 100644 --- a/std/math/index.zig +++ b/std/math/index.zig @@ -558,6 +558,32 @@ test "math.floorPowerOfTwo" { comptime testFloorPowerOfTwo(); } +pub fn log2_int(comptime T: type, x: T) Log2Int(T) { + assert(x != 0); + return Log2Int(T)(T.bit_count - 1 - @clz(x)); +} + +pub fn log2_int_ceil(comptime T: type, x: T) Log2Int(T) { + assert(x != 0); + const log2_val = log2_int(T, x); + if (T(1) << log2_val == x) + return log2_val; + return log2_val + 1; +} + +test "std.math.log2_int_ceil" { + assert(log2_int_ceil(u32, 1) == 0); + assert(log2_int_ceil(u32, 2) == 1); + assert(log2_int_ceil(u32, 3) == 2); + assert(log2_int_ceil(u32, 4) == 2); + assert(log2_int_ceil(u32, 5) == 3); + assert(log2_int_ceil(u32, 6) == 3); + assert(log2_int_ceil(u32, 7) == 3); + assert(log2_int_ceil(u32, 8) == 3); + assert(log2_int_ceil(u32, 9) == 4); + assert(log2_int_ceil(u32, 10) == 4); +} + fn testFloorPowerOfTwo() void { assert(floorPowerOfTwo(u32, 63) == 32); assert(floorPowerOfTwo(u32, 64) == 64); diff --git a/std/math/log2.zig b/std/math/log2.zig index 998d6d6c5e..d5bbe385c2 100644 --- a/std/math/log2.zig +++ b/std/math/log2.zig @@ -31,17 +31,12 @@ pub fn log2(x: var) @typeOf(x) { return result; }, TypeId.Int => { - return log2_int(T, x); + return math.log2_int(T, x); }, else => @compileError("log2 not implemented for " ++ @typeName(T)), } } -pub fn log2_int(comptime T: type, x: T) T { - assert(x != 0); - return T.bit_count - 1 - T(@clz(x)); -} - pub fn log2_32(x_: f32) f32 { const ivln2hi: f32 = 1.4428710938e+00; const ivln2lo: f32 = -1.7605285393e-04; diff --git a/std/segmented_list.zig b/std/segmented_list.zig new file mode 100644 index 0000000000..c9acd53464 --- /dev/null +++ b/std/segmented_list.zig @@ -0,0 +1,272 @@ +const std = @import("index.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; + +// Imagine that `fn at(self: &Self, index: usize) &T` is a customer asking for a box +// from a warehouse, based on a flat array, boxes ordered from 0 to N - 1. +// But the warehouse actually stores boxes in shelves of increasing powers of 2 sizes. +// So when the customer requests a box index, we have to translate it to shelf index +// and box index within that shelf. Illustration: +// +// customer indexes: +// shelf 0: 0 +// shelf 1: 1 2 +// shelf 2: 3 4 5 6 +// shelf 3: 7 8 9 10 11 12 13 14 +// shelf 4: 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +// shelf 5: 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 +// ... +// +// warehouse indexes: +// shelf 0: 0 +// shelf 1: 0 1 +// shelf 2: 0 1 2 3 +// shelf 3: 0 1 2 3 4 5 6 7 +// shelf 4: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +// shelf 5: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +// ... +// +// With this arrangement, here are the equations to get the shelf index and +// box index based on customer box index: +// +// shelf_index = floor(log2(customer_index + 1)) +// shelf_count = ceil(log2(box_count + 1)) +// box_index = customer_index + 1 - 2 ** shelf +// shelf_size = 2 ** shelf_index +// +// Now we complicate it a little bit further by adding a preallocated shelf, which must be +// a power of 2: +// prealloc=4 +// +// customer indexes: +// prealloc: 0 1 2 3 +// shelf 0: 4 5 6 7 8 9 10 11 +// shelf 1: 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 +// shelf 2: 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 +// ... +// +// warehouse indexes: +// prealloc: 0 1 2 3 +// shelf 0: 0 1 2 3 4 5 6 7 +// shelf 1: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +// shelf 2: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +// ... +// +// Now the equations are: +// +// shelf_index = floor(log2(customer_index + prealloc)) - log2(prealloc) - 1 +// shelf_count = ceil(log2(box_count + prealloc)) - log2(prealloc) - 1 +// box_index = customer_index + prealloc - 2 ** (log2(prealloc) + 1 + shelf) +// shelf_size = prealloc * 2 ** (shelf_index + 1) + +/// This is a stack data structure where pointers to indexes have the same lifetime as the data structure +/// itself, unlike ArrayList where push() invalidates all existing element pointers. +/// The tradeoff is that elements are not guaranteed to be contiguous. For that, use ArrayList. +/// Note however that most elements are contiguous, making this data structure cache-friendly. +/// +/// Because it never has to copy elements from an old location to a new location, it does not require +/// its elements to be copyable, and it avoids wasting memory when backed by an ArenaAllocator. +/// +/// This data structure has O(1) push and O(1) pop. +/// +/// It supports preallocated elements, making it especially well suited when the expected maximum +/// size is small. `prealloc_item_count` must be 0, or a power of 2. +pub fn SegmentedList(comptime T: type, comptime prealloc_item_count: usize) type { + return struct { + const Self = this; + const prealloc_base = blk: { + assert(prealloc_item_count != 0); + const value = std.math.log2_int(usize, prealloc_item_count); + assert((1 << value) == prealloc_item_count); // prealloc_item_count must be a power of 2 + break :blk @typeOf(1)(value); + }; + const ShelfIndex = std.math.Log2Int(usize); + + allocator: &Allocator, + len: usize, + prealloc_segment: [prealloc_item_count]T, + dynamic_segments: []&T, + + /// Deinitialize with `deinit` + pub fn init(allocator: &Allocator) Self { + return Self { + .allocator = allocator, + .len = 0, + .prealloc_segment = undefined, + .dynamic_segments = []&T{}, + }; + } + + pub fn deinit(self: &Self) void { + self.freeShelves(ShelfIndex(self.dynamic_segments.len), 0); + self.allocator.free(self.dynamic_segments); + *self = undefined; + } + + pub fn at(self: &Self, i: usize) &T { + assert(i < self.len); + return self.uncheckedAt(i); + } + + pub fn count(self: &const Self) usize { + return self.len; + } + + pub fn push(self: &Self, item: &const T) !void { + const new_item_ptr = try self.addOne(); + *new_item_ptr = *item; + } + + pub fn pushMany(self: &Self, items: []const T) !void { + for (items) |item| { + try self.push(item); + } + } + + pub fn pop(self: &Self) ?T { + if (self.len == 0) + return null; + + const index = self.len - 1; + const result = *self.uncheckedAt(index); + self.len = index; + return result; + } + + pub fn addOne(self: &Self) !&T { + const new_length = self.len + 1; + try self.setCapacity(new_length); + const result = self.uncheckedAt(self.len); + self.len = new_length; + return result; + } + + pub fn setCapacity(self: &Self, new_capacity: usize) !void { + if (new_capacity <= prealloc_item_count) { + const len = ShelfIndex(self.dynamic_segments.len); + if (len == 0) return; + self.freeShelves(len, 0); + self.allocator.free(self.dynamic_segments); + self.dynamic_segments = []&T{}; + return; + } + + const new_cap_shelf_count = shelfCount(new_capacity); + const old_shelf_count = ShelfIndex(self.dynamic_segments.len); + if (new_cap_shelf_count > old_shelf_count) { + self.dynamic_segments = try self.allocator.realloc(&T, self.dynamic_segments, new_cap_shelf_count); + var i = old_shelf_count; + errdefer { + self.freeShelves(i, old_shelf_count); + self.dynamic_segments = self.allocator.shrink(&T, self.dynamic_segments, old_shelf_count); + } + while (i < new_cap_shelf_count) : (i += 1) { + self.dynamic_segments[i] = (try self.allocator.alloc(T, shelfSize(i))).ptr; + } + return; + } + if (new_cap_shelf_count == old_shelf_count) { + return; + } + self.freeShelves(old_shelf_count, new_cap_shelf_count); + self.dynamic_segments = self.allocator.shrink(&T, self.dynamic_segments, new_cap_shelf_count); + } + + pub fn shrinkCapacity(self: &Self, new_capacity: usize) void { + assert(new_capacity <= prealloc_item_count or shelfCount(new_capacity) <= self.dynamic_segments.len); + self.setCapacity(new_capacity) catch unreachable; + } + + pub fn uncheckedAt(self: &Self, index: usize) &T { + if (index < prealloc_item_count) { + return &self.prealloc_segment[index]; + } + const shelf_index = shelfIndex(index); + const box_index = boxIndex(index, shelf_index); + return &self.dynamic_segments[shelf_index][box_index]; + } + + fn shelfCount(box_count: usize) ShelfIndex { + if (prealloc_item_count == 0) { + return std.math.log2_int_ceil(usize, box_count + 1); + } + return std.math.log2_int_ceil(usize, box_count + prealloc_item_count) - prealloc_base - 1; + } + + fn shelfSize(shelf_index: ShelfIndex) usize { + if (prealloc_item_count == 0) { + return usize(1) << shelf_index; + } + return usize(1) << (shelf_index + (prealloc_base + 1)); + } + + fn shelfIndex(list_index: usize) ShelfIndex { + if (prealloc_item_count == 0) { + return std.math.log2_int(usize, list_index + 1); + } + return std.math.log2_int(usize, list_index + prealloc_item_count) - prealloc_base - 1; + } + + fn boxIndex(list_index: usize, shelf_index: ShelfIndex) usize { + if (prealloc_item_count == 0) { + return (list_index + 1) - (usize(1) << shelf_index); + } + return list_index + prealloc_item_count - (usize(1) << ((prealloc_base + 1) + shelf_index)); + } + + fn freeShelves(self: &Self, from_count: ShelfIndex, to_count: ShelfIndex) void { + var i = from_count; + while (i != to_count) { + i -= 1; + self.allocator.free(self.dynamic_segments[i][0..shelfSize(i)]); + } + } + + }; +} + +test "std.SegmentedList" { + var da = std.heap.DirectAllocator.init(); + defer da.deinit(); + var a = &da.allocator; + + try testSegmentedList(0, a); + try testSegmentedList(1, a); + try testSegmentedList(2, a); + try testSegmentedList(4, a); + try testSegmentedList(8, a); + try testSegmentedList(16, a); +} + +fn testSegmentedList(comptime prealloc: usize, allocator: &Allocator) !void { + var list = SegmentedList(i32, prealloc).init(allocator); + defer list.deinit(); + + {var i: usize = 0; while (i < 100) : (i += 1) { + try list.push(i32(i + 1)); + assert(list.len == i + 1); + }} + + {var i: usize = 0; while (i < 100) : (i += 1) { + assert(*list.at(i) == i32(i + 1)); + }} + + assert(??list.pop() == 100); + assert(list.len == 99); + + try list.pushMany([]i32 { 1, 2, 3 }); + assert(list.len == 102); + assert(??list.pop() == 3); + assert(??list.pop() == 2); + assert(??list.pop() == 1); + assert(list.len == 99); + + try list.pushMany([]const i32 {}); + assert(list.len == 99); + + var i: i32 = 99; + while (list.pop()) |item| : (i -= 1) { + assert(item == i); + list.shrinkCapacity(list.len); + } +}