mirror of
https://github.com/ziglang/zig.git
synced 2026-01-21 06:45:24 +00:00
remove extra storage from EnvMap on windows
This commit is contained in:
parent
e70cb04f89
commit
69f0a5587d
@ -1245,7 +1245,7 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !
|
||||
while (it.next()) |pair| {
|
||||
// +1 for '='
|
||||
// +1 for null byte
|
||||
max_chars_needed += pair.name.len + pair.value.len + 2;
|
||||
max_chars_needed += pair.key_ptr.len + pair.value_ptr.len + 2;
|
||||
}
|
||||
break :x max_chars_needed;
|
||||
};
|
||||
@ -1255,10 +1255,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !
|
||||
var it = env_map.iterator();
|
||||
var i: usize = 0;
|
||||
while (it.next()) |pair| {
|
||||
i += try unicode.utf8ToUtf16Le(result[i..], pair.name);
|
||||
i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*);
|
||||
result[i] = '=';
|
||||
i += 1;
|
||||
i += try unicode.utf8ToUtf16Le(result[i..], pair.value);
|
||||
i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*);
|
||||
result[i] = 0;
|
||||
i += 1;
|
||||
}
|
||||
@ -1280,10 +1280,10 @@ pub fn createNullDelimitedEnvMap(arena: mem.Allocator, env_map: *const EnvMap) !
|
||||
var it = env_map.iterator();
|
||||
var i: usize = 0;
|
||||
while (it.next()) |pair| : (i += 1) {
|
||||
const env_buf = try arena.allocSentinel(u8, pair.name.len + pair.value.len + 1, 0);
|
||||
mem.copy(u8, env_buf, pair.name);
|
||||
env_buf[pair.name.len] = '=';
|
||||
mem.copy(u8, env_buf[pair.name.len + 1 ..], pair.value);
|
||||
const env_buf = try arena.allocSentinel(u8, pair.key_ptr.len + pair.value_ptr.len + 1, 0);
|
||||
mem.copy(u8, env_buf, pair.key_ptr.*);
|
||||
env_buf[pair.key_ptr.len] = '=';
|
||||
mem.copy(u8, env_buf[pair.key_ptr.len + 1 ..], pair.value_ptr.*);
|
||||
envp_buf[i] = env_buf.ptr;
|
||||
}
|
||||
assert(i == envp_count);
|
||||
|
||||
@ -52,341 +52,128 @@ test "getCwdAlloc" {
|
||||
testing.allocator.free(cwd);
|
||||
}
|
||||
|
||||
/// EnvMap for Windows that handles Unicode-aware case insensitivity for lookups, while also
|
||||
/// providing the canonical environment variable names when iterating.
|
||||
///
|
||||
/// Allows for zero-allocation lookups (even though it needs to do UTF-8 -> UTF-16 -> uppercase
|
||||
/// conversions) by allocating a buffer large enough to fit the largest environment variable
|
||||
/// name, and using that when doing lookups (i.e. anything that overflows the buffer can be treated
|
||||
/// as the environment variable not being found).
|
||||
pub const EnvMapWindows = struct {
|
||||
allocator: Allocator,
|
||||
/// Keys are UTF-16le stored as []const u8
|
||||
uppercased_map: std.StringHashMapUnmanaged(EnvValue),
|
||||
/// Buffer for converting to uppercased UTF-16 on key lookups
|
||||
/// Must call `reallocUppercaseBuf` before doing any lookups after a `put` call.
|
||||
uppercase_buf_utf16: []u16 = &[_]u16{},
|
||||
max_name_utf16_length: usize = 0,
|
||||
|
||||
pub const EnvValue = struct {
|
||||
value: []const u8,
|
||||
canonical_name: []const u8,
|
||||
};
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Deinitialize with `deinit`.
|
||||
pub fn init(allocator: Allocator) Self {
|
||||
return .{
|
||||
.allocator = allocator,
|
||||
.uppercased_map = std.StringHashMapUnmanaged(EnvValue){},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
var it = self.uppercased_map.iterator();
|
||||
while (it.next()) |entry| {
|
||||
self.allocator.free(entry.key_ptr.*);
|
||||
self.allocator.free(entry.value_ptr.value);
|
||||
self.allocator.free(entry.value_ptr.canonical_name);
|
||||
}
|
||||
self.uppercased_map.deinit(self.allocator);
|
||||
self.allocator.free(self.uppercase_buf_utf16);
|
||||
}
|
||||
|
||||
/// Increases the size of the uppercase buffer if the maximum name size has increased.
|
||||
/// Must be called before any `get` calls after any number of `put` calls.
|
||||
pub fn reallocUppercaseBuf(self: *Self) !void {
|
||||
if (self.max_name_utf16_length > self.uppercase_buf_utf16.len) {
|
||||
self.uppercase_buf_utf16 = try self.allocator.realloc(self.uppercase_buf_utf16, self.max_name_utf16_length);
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts `src` to uppercase using `RtlUpcaseUnicodeString` and puts the result in `dest`.
|
||||
/// Returns the length of the converted UTF-16 string. `dest.len` must be >= `src.len`.
|
||||
///
|
||||
/// Note: As of now, RtlUpcaseUnicodeString does not seem to handle codepoints above 0x10000
|
||||
/// (i.e. those that require a surrogate pair), so this function will always return a length
|
||||
/// equal to `src.len`. However, if RtlUpcaseUnicodeString is updated to handle codepoints above
|
||||
/// 0x10000, this property would still hold unless there are lowercase <-> uppercase conversions
|
||||
/// that cross over the boundary between codepoints >= 0x10000 and < 0x10000.
|
||||
/// TODO: Is it feasible that Unicode lowercase <-> uppercase conversions could cross that boundary?
|
||||
fn uppercaseName(dest: []u16, src: []const u16) u16 {
|
||||
assert(dest.len >= src.len);
|
||||
|
||||
const dest_bytes = @intCast(u16, dest.len * 2);
|
||||
var dest_string = os.windows.UNICODE_STRING{
|
||||
.Length = dest_bytes,
|
||||
.MaximumLength = dest_bytes,
|
||||
.Buffer = @intToPtr([*]u16, @ptrToInt(dest.ptr)),
|
||||
};
|
||||
const src_bytes = @intCast(u16, src.len * 2);
|
||||
const src_string = os.windows.UNICODE_STRING{
|
||||
.Length = src_bytes,
|
||||
.MaximumLength = src_bytes,
|
||||
.Buffer = @intToPtr([*]u16, @ptrToInt(src.ptr)),
|
||||
};
|
||||
const rc = os.windows.ntdll.RtlUpcaseUnicodeString(&dest_string, &src_string, os.windows.FALSE);
|
||||
switch (rc) {
|
||||
.SUCCESS => return dest_string.Length / 2,
|
||||
else => unreachable, // we are not allocating, so no errors should be possible
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: Does not realloc the uppercase buf to allow for calling put for many variables and
|
||||
/// only allocating the uppercase buf afterwards.
|
||||
pub fn putUtf8(self: *Self, name: []const u8, value: []const u8) !void {
|
||||
const uppercased_len = len: {
|
||||
const name_uppercased_utf16 = uppercased: {
|
||||
var name_utf16_buf = try std.ArrayListAligned(u8, @alignOf(u16)).initCapacity(self.allocator, name.len);
|
||||
errdefer name_utf16_buf.deinit();
|
||||
|
||||
const bytes_written = try std.unicode.utf8ToUtf16LeWriter(name_utf16_buf.writer(), name);
|
||||
var name_utf16 = name_utf16_buf.items[0..bytes_written];
|
||||
|
||||
// uppercase in place
|
||||
var name_uppercased_utf16 = std.mem.bytesAsSlice(u16, name_utf16);
|
||||
const uppercased_len = uppercaseName(name_uppercased_utf16, name_uppercased_utf16);
|
||||
assert(uppercased_len == name_uppercased_utf16.len);
|
||||
|
||||
break :uppercased name_utf16_buf.toOwnedSlice();
|
||||
};
|
||||
errdefer self.allocator.free(name_uppercased_utf16);
|
||||
|
||||
const name_canonical = try self.allocator.dupe(u8, name);
|
||||
errdefer self.allocator.free(name_canonical);
|
||||
|
||||
const value_dupe = try self.allocator.dupe(u8, value);
|
||||
errdefer self.allocator.free(value_dupe);
|
||||
|
||||
const get_or_put = try self.uppercased_map.getOrPut(self.allocator, name_uppercased_utf16);
|
||||
if (get_or_put.found_existing) {
|
||||
// note: this is only safe from UAF because the errdefer that frees this value above
|
||||
// no longer has a possibility of being triggered after this point
|
||||
self.allocator.free(name_uppercased_utf16);
|
||||
self.allocator.free(get_or_put.value_ptr.value);
|
||||
self.allocator.free(get_or_put.value_ptr.canonical_name);
|
||||
} else {
|
||||
get_or_put.key_ptr.* = name_uppercased_utf16;
|
||||
}
|
||||
get_or_put.value_ptr.value = value_dupe;
|
||||
get_or_put.value_ptr.canonical_name = name_canonical;
|
||||
|
||||
break :len name_uppercased_utf16.len;
|
||||
};
|
||||
|
||||
// The buffer for case conversion for key lookups will need to be as big as the largest
|
||||
// key stored in the hash map.
|
||||
self.max_name_utf16_length = @maximum(self.max_name_utf16_length, uppercased_len);
|
||||
}
|
||||
|
||||
/// Asserts that the name does not already exist in the map.
|
||||
/// Note: Does not realloc the uppercase buf to allow for calling put for many variables and
|
||||
/// only allocating the uppercase buf afterwards.
|
||||
pub fn putUtf16NoClobber(self: *Self, name_utf16: []const u16, value_utf16: []const u16) !void {
|
||||
const uppercased_len = len: {
|
||||
const name_canonical = try std.unicode.utf16leToUtf8Alloc(self.allocator, name_utf16);
|
||||
errdefer self.allocator.free(name_canonical);
|
||||
|
||||
const value = try std.unicode.utf16leToUtf8Alloc(self.allocator, value_utf16);
|
||||
errdefer self.allocator.free(value);
|
||||
|
||||
const name_uppercased_utf16 = try self.allocator.alloc(u16, name_utf16.len);
|
||||
errdefer self.allocator.free(name_uppercased_utf16);
|
||||
|
||||
const uppercased_len = uppercaseName(name_uppercased_utf16, name_utf16);
|
||||
assert(uppercased_len == name_uppercased_utf16.len);
|
||||
|
||||
try self.uppercased_map.putNoClobber(self.allocator, std.mem.sliceAsBytes(name_uppercased_utf16), EnvValue{
|
||||
.value = value,
|
||||
.canonical_name = name_canonical,
|
||||
});
|
||||
break :len name_uppercased_utf16.len;
|
||||
};
|
||||
|
||||
// The buffer for case conversion for key lookups will need to be as big as the largest
|
||||
// key stored in the hash map.
|
||||
self.max_name_utf16_length = @maximum(self.max_name_utf16_length, uppercased_len);
|
||||
}
|
||||
|
||||
/// Attempts to convert a UTF-8 name into a uppercased UTF-16le name for a lookup. If the
|
||||
/// name cannot be converted, this function will return `null`.
|
||||
fn utf8ToUppercasedUtf16(self: Self, name: []const u8) ?[]u16 {
|
||||
const name_utf16: []u16 = to_utf16: {
|
||||
var utf16_buf_stream = std.io.fixedBufferStream(std.mem.sliceAsBytes(self.uppercase_buf_utf16));
|
||||
_ = std.unicode.utf8ToUtf16LeWriter(utf16_buf_stream.writer(), name) catch |err| switch (err) {
|
||||
// If the buffer isn't large enough, we can treat that as 'env var not found', as we
|
||||
// know anything too large for the buffer can't be found in the map.
|
||||
error.NoSpaceLeft => return null,
|
||||
// Anything with invalid UTF-8 will also not be found in the map, so treat that as
|
||||
// 'env var not found' too
|
||||
error.InvalidUtf8 => return null,
|
||||
};
|
||||
break :to_utf16 std.mem.bytesAsSlice(u16, utf16_buf_stream.getWritten());
|
||||
};
|
||||
|
||||
// uppercase in place
|
||||
const uppercased_len = uppercaseName(name_utf16, name_utf16);
|
||||
assert(uppercased_len == name_utf16.len);
|
||||
|
||||
return name_utf16;
|
||||
}
|
||||
|
||||
/// Returns true if an entry was found and deleted, false otherwise.
|
||||
pub fn remove(self: *Self, name: []const u8) bool {
|
||||
const name_utf16 = self.utf8ToUppercasedUtf16(name) orelse return false;
|
||||
const kv = self.uppercased_map.fetchRemove(std.mem.sliceAsBytes(name_utf16)) orelse return false;
|
||||
self.allocator.free(kv.key);
|
||||
self.allocator.free(kv.value.value);
|
||||
self.allocator.free(kv.value.canonical_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
pub fn get(self: Self, name: []const u8) ?EnvValue {
|
||||
const name_utf16 = self.utf8ToUppercasedUtf16(name) orelse return null;
|
||||
return self.uppercased_map.get(std.mem.sliceAsBytes(name_utf16));
|
||||
}
|
||||
|
||||
pub fn count(self: Self) EnvMap.Size {
|
||||
return self.uppercased_map.count();
|
||||
}
|
||||
|
||||
pub fn iterator(self: *const Self) Iterator {
|
||||
return .{
|
||||
.env_map = self,
|
||||
.uppercased_map_iterator = self.uppercased_map.iterator(),
|
||||
};
|
||||
}
|
||||
|
||||
pub const Iterator = struct {
|
||||
env_map: *const Self,
|
||||
uppercased_map_iterator: std.StringHashMapUnmanaged(EnvValue).Iterator,
|
||||
|
||||
pub fn next(it: *Iterator) ?EnvMap.Entry {
|
||||
if (it.uppercased_map_iterator.next()) |uppercased_entry| {
|
||||
return EnvMap.Entry{
|
||||
.name = uppercased_entry.value_ptr.canonical_name,
|
||||
.value = uppercased_entry.value_ptr.value,
|
||||
};
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
test "EnvMapWindows" {
|
||||
if (builtin.os.tag != .windows) return error.SkipZigTest;
|
||||
|
||||
var env_map = EnvMapWindows.init(testing.allocator);
|
||||
defer env_map.deinit();
|
||||
|
||||
// both put methods
|
||||
try env_map.putUtf16NoClobber(std.unicode.utf8ToUtf16LeStringLiteral("Path"), std.unicode.utf8ToUtf16LeStringLiteral("something"));
|
||||
try env_map.putUtf8("КИРиллИЦА", "something else");
|
||||
try env_map.reallocUppercaseBuf();
|
||||
|
||||
try testing.expectEqual(@as(EnvMap.Size, 2), env_map.count());
|
||||
|
||||
// unicode-aware case-insensitive lookups
|
||||
try testing.expectEqualStrings("something", env_map.get("PATH").?.value);
|
||||
try testing.expectEqualStrings("something else", env_map.get("кириллица").?.value);
|
||||
try testing.expect(env_map.get("missing") == null);
|
||||
|
||||
// canonical names when iterating
|
||||
var it = env_map.iterator();
|
||||
var count: EnvMap.Size = 0;
|
||||
while (it.next()) |entry| {
|
||||
const is_an_expected_name = std.mem.eql(u8, "Path", entry.name) or std.mem.eql(u8, "КИРиллИЦА", entry.name);
|
||||
try testing.expect(is_an_expected_name);
|
||||
count += 1;
|
||||
}
|
||||
try testing.expectEqual(@as(EnvMap.Size, 2), count);
|
||||
}
|
||||
|
||||
pub const EnvMap = struct {
|
||||
storage: StorageType,
|
||||
hash_map: HashMap,
|
||||
|
||||
pub const StorageType = switch (builtin.os.tag) {
|
||||
.windows => EnvMapWindows,
|
||||
else => std.BufMap,
|
||||
};
|
||||
const HashMap = std.HashMap(
|
||||
[]const u8,
|
||||
[]const u8,
|
||||
EnvNameHashContext,
|
||||
std.hash_map.default_max_load_percentage,
|
||||
);
|
||||
|
||||
pub const Size = std.BufMap.BufMapHashMap.Size;
|
||||
|
||||
const Self = @This();
|
||||
|
||||
/// Deinitialize with `deinit`.
|
||||
pub fn init(allocator: Allocator) Self {
|
||||
return Self{ .storage = StorageType.init(allocator) };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.storage.deinit();
|
||||
}
|
||||
|
||||
pub fn get(self: Self, name: []const u8) ?[]const u8 {
|
||||
switch (builtin.os.tag) {
|
||||
.windows => {
|
||||
if (self.storage.get(name)) |entry| {
|
||||
return entry.value;
|
||||
} else {
|
||||
return null;
|
||||
pub const EnvNameHashContext = struct {
|
||||
pub fn hash(self: @This(), s: []const u8) u64 {
|
||||
_ = self;
|
||||
if (builtin.os.tag == .windows) {
|
||||
const h = std.hash.Wyhash.init(0);
|
||||
// TODO: improve this, instead of iterating over ascii,
|
||||
// iterate over with unicode
|
||||
for (s) |c| {
|
||||
var s_upper = [_]u8 { std.ascii.toLower(c) };
|
||||
h.update(s_upper);
|
||||
}
|
||||
},
|
||||
else => return self.storage.get(name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn count(self: Self) Size {
|
||||
return self.storage.count();
|
||||
}
|
||||
|
||||
pub fn iterator(self: *const Self) Iterator {
|
||||
return .{ .storage_iterator = self.storage.iterator() };
|
||||
}
|
||||
|
||||
pub fn put(self: *Self, name: []const u8, value: []const u8) !void {
|
||||
switch (builtin.os.tag) {
|
||||
.windows => {
|
||||
try self.storage.putUtf8(name, value);
|
||||
try self.storage.reallocUppercaseBuf();
|
||||
},
|
||||
else => return self.storage.put(name, value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove(self: *Self, name: []const u8) void {
|
||||
_ = self.storage.remove(name);
|
||||
}
|
||||
|
||||
pub const Entry = struct {
|
||||
name: []const u8,
|
||||
value: []const u8,
|
||||
};
|
||||
|
||||
pub const Iterator = struct {
|
||||
storage_iterator: switch (builtin.os.tag) {
|
||||
.windows => EnvMapWindows.Iterator,
|
||||
else => std.BufMap.BufMapHashMap.Iterator,
|
||||
},
|
||||
|
||||
pub fn next(it: *Iterator) ?Entry {
|
||||
switch (builtin.os.tag) {
|
||||
.windows => return it.storage_iterator.next(),
|
||||
else => {
|
||||
if (it.storage_iterator.next()) |entry| {
|
||||
return Entry{
|
||||
.name = entry.key_ptr.*,
|
||||
.value = entry.value_ptr.*,
|
||||
};
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
},
|
||||
return h.final();
|
||||
}
|
||||
return std.hash_map.hashString(s);
|
||||
}
|
||||
pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {
|
||||
_ = self;
|
||||
if (builtin.os.tag == .windows) {
|
||||
// TODO: improve this, instead of comparing ascii
|
||||
// compare with unicode
|
||||
return std.ascii.eqlIgnoreCase(a, b);
|
||||
}
|
||||
return std.hash_map.eqlString(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
/// Create a EnvMap backed by a specific allocator.
|
||||
/// That allocator will be used for both backing allocations
|
||||
/// and string deduplication.
|
||||
pub fn init(allocator: Allocator) EnvMap {
|
||||
return EnvMap{ .hash_map = HashMap.init(allocator) };
|
||||
}
|
||||
|
||||
/// Free the backing storage of the map, as well as all
|
||||
/// of the stored keys and values.
|
||||
pub fn deinit(self: *EnvMap) void {
|
||||
var it = self.hash_map.iterator();
|
||||
while (it.next()) |entry| {
|
||||
self.free(entry.key_ptr.*);
|
||||
self.free(entry.value_ptr.*);
|
||||
}
|
||||
|
||||
self.hash_map.deinit();
|
||||
}
|
||||
|
||||
/// Same as `put` but the key and value become owned by the EnvMap rather
|
||||
/// than being copied.
|
||||
/// If `putMove` fails, the ownership of key and value does not transfer.
|
||||
pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void {
|
||||
const get_or_put = try self.hash_map.getOrPut(key);
|
||||
if (get_or_put.found_existing) {
|
||||
self.free(get_or_put.key_ptr.*);
|
||||
self.free(get_or_put.value_ptr.*);
|
||||
get_or_put.key_ptr.* = key;
|
||||
}
|
||||
get_or_put.value_ptr.* = value;
|
||||
}
|
||||
|
||||
/// `key` and `value` are copied into the EnvMap.
|
||||
pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void {
|
||||
const value_copy = try self.copy(value);
|
||||
errdefer self.free(value_copy);
|
||||
const get_or_put = try self.hash_map.getOrPut(key);
|
||||
if (get_or_put.found_existing) {
|
||||
self.free(get_or_put.value_ptr.*);
|
||||
} else {
|
||||
get_or_put.key_ptr.* = self.copy(key) catch |err| {
|
||||
_ = self.hash_map.remove(key);
|
||||
return err;
|
||||
};
|
||||
}
|
||||
get_or_put.value_ptr.* = value_copy;
|
||||
}
|
||||
|
||||
/// Find the address of the value associated with a key.
|
||||
/// The returned pointer is invalidated if the map resizes.
|
||||
pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 {
|
||||
return self.hash_map.getPtr(key);
|
||||
}
|
||||
|
||||
/// Return the map's copy of the value associated with
|
||||
/// a key. The returned string is invalidated if this
|
||||
/// key is removed from the map.
|
||||
pub fn get(self: EnvMap, key: []const u8) ?[]const u8 {
|
||||
return self.hash_map.get(key);
|
||||
}
|
||||
|
||||
/// Removes the item from the map and frees its value.
|
||||
/// This invalidates the value returned by get() for this key.
|
||||
pub fn remove(self: *EnvMap, key: []const u8) void {
|
||||
const kv = self.hash_map.fetchRemove(key) orelse return;
|
||||
self.free(kv.key);
|
||||
self.free(kv.value);
|
||||
}
|
||||
|
||||
/// Returns the number of KV pairs stored in the map.
|
||||
pub fn count(self: EnvMap) HashMap.Size {
|
||||
return self.hash_map.count();
|
||||
}
|
||||
|
||||
/// Returns an iterator over entries in the map.
|
||||
pub fn iterator(self: *const EnvMap) HashMap.Iterator {
|
||||
return self.hash_map.iterator();
|
||||
}
|
||||
|
||||
fn free(self: EnvMap, value: []const u8) void {
|
||||
self.hash_map.allocator.free(value);
|
||||
}
|
||||
|
||||
fn copy(self: EnvMap, value: []const u8) ![]u8 {
|
||||
return self.hash_map.allocator.dupe(u8, value);
|
||||
}
|
||||
};
|
||||
|
||||
test "EnvMap" {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user