mirror of
https://github.com/ziglang/zig.git
synced 2025-12-14 18:23:12 +00:00
Sema: avoid breaking hash contract when instantiating generic functions
* Add tagName to Value which behaves like @tagName. * Add hashUncoerced to Value as an alternative to hash when we want to produce the same hash for value that can coerce to each other. * Hash owner_decl instead of module_fn in Sema.instantiateGenericCall since Module.Decl.Index is not affected by ASLR like *Module.Fn was, and also because GenericCallAdapter.eql was already doing this. * Use Value.hashUncoerced in Sema.instantiateGenericCall because GenericCallAdapter.eql uses Value.eqlAdvanced to compare args, which ignores coersions. * Add revealed missing cases to Value.eqlAdvanced. Without these changes, we were breaking the hash contract for monomorphed_funcs, and were generating different hashes for values that compared equal. This resulted in a 0.2% chance when compiling self-hosted of producing a different output, which depended on fingerprint collisions of hashes that were affected by ASLR. Normally, the different hashes would have resulted in equal checks being skipped, but in the case of a fingerprint collision, the truth would be revealed and the compiler's behavior would diverge.
This commit is contained in:
parent
59af6417bb
commit
e40c38d258
@ -6828,7 +6828,7 @@ fn instantiateGenericCall(
|
||||
// don't find out until after generating a monomorphed function whether the parameter
|
||||
// type ended up being a "must-be-comptime-known" type.
|
||||
var hasher = std.hash.Wyhash.init(0);
|
||||
std.hash.autoHash(&hasher, @ptrToInt(module_fn));
|
||||
std.hash.autoHash(&hasher, module_fn.owner_decl);
|
||||
|
||||
const generic_args = try sema.arena.alloc(GenericCallAdapter.Arg, func_ty_info.param_types.len);
|
||||
{
|
||||
@ -6871,7 +6871,7 @@ fn instantiateGenericCall(
|
||||
},
|
||||
else => |e| return e,
|
||||
};
|
||||
arg_val.hash(arg_ty, &hasher, mod);
|
||||
arg_val.hashUncoerced(arg_ty, &hasher, mod);
|
||||
if (is_anytype) {
|
||||
arg_ty.hashWithHasher(&hasher, mod);
|
||||
generic_args[i] = .{
|
||||
|
||||
125
src/value.zig
125
src/value.zig
@ -1055,6 +1055,40 @@ pub const Value = extern union {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tagName(val: Value, ty: Type, mod: *Module) []const u8 {
|
||||
if (ty.zigTypeTag() == .Union) return val.unionTag().tagName(ty.unionTagTypeHypothetical(), mod);
|
||||
|
||||
const field_index = switch (val.tag()) {
|
||||
.enum_field_index => val.castTag(.enum_field_index).?.data,
|
||||
.the_only_possible_value => blk: {
|
||||
assert(ty.enumFieldCount() == 1);
|
||||
break :blk 0;
|
||||
},
|
||||
.enum_literal => return val.castTag(.enum_literal).?.data,
|
||||
else => field_index: {
|
||||
const values = switch (ty.tag()) {
|
||||
.enum_full, .enum_nonexhaustive => ty.cast(Type.Payload.EnumFull).?.data.values,
|
||||
.enum_numbered => ty.castTag(.enum_numbered).?.data.values,
|
||||
.enum_simple => Module.EnumFull.ValueMap{},
|
||||
else => unreachable,
|
||||
};
|
||||
break :field_index if (values.entries.len == 0)
|
||||
// auto-numbered enum
|
||||
@intCast(u32, val.toUnsignedInt(mod.getTarget()))
|
||||
else
|
||||
@intCast(u32, values.getIndexContext(val, .{ .ty = ty, .mod = mod }).?);
|
||||
},
|
||||
};
|
||||
|
||||
const fields = switch (ty.tag()) {
|
||||
.enum_full, .enum_nonexhaustive => ty.cast(Type.Payload.EnumFull).?.data.fields,
|
||||
.enum_numbered => ty.castTag(.enum_numbered).?.data.fields,
|
||||
.enum_simple => ty.castTag(.enum_simple).?.data.fields,
|
||||
else => unreachable,
|
||||
};
|
||||
return fields.keys()[field_index];
|
||||
}
|
||||
|
||||
/// Asserts the value is an integer.
|
||||
pub fn toBigInt(val: Value, space: *BigIntSpace, target: Target) BigIntConst {
|
||||
return val.toBigIntAdvanced(space, target, null) catch unreachable;
|
||||
@ -2211,7 +2245,7 @@ pub const Value = extern union {
|
||||
return eqlAdvanced(a_union.val, active_field_ty, b_union.val, active_field_ty, mod, sema_kit);
|
||||
},
|
||||
else => {},
|
||||
} else if (a_tag == .null_value or b_tag == .null_value) {
|
||||
} else if (b_tag == .null_value or b_tag == .@"error") {
|
||||
return false;
|
||||
} else if (a_tag == .undef or b_tag == .undef) {
|
||||
return false;
|
||||
@ -2335,18 +2369,25 @@ pub const Value = extern union {
|
||||
if (a_nan) return true;
|
||||
return a_float == b_float;
|
||||
},
|
||||
.Optional => {
|
||||
if (a.tag() != .opt_payload and b.tag() == .opt_payload) {
|
||||
var buffer: Payload.SubValue = .{
|
||||
.base = .{ .tag = .opt_payload },
|
||||
.data = a,
|
||||
};
|
||||
const opt_val = Value.initPayload(&buffer.base);
|
||||
return eqlAdvanced(opt_val, ty, b, ty, mod, sema_kit);
|
||||
}
|
||||
.Optional => if (a_tag != .null_value and b_tag == .opt_payload) {
|
||||
var sub_pl: Payload.SubValue = .{
|
||||
.base = .{ .tag = b.tag() },
|
||||
.data = a,
|
||||
};
|
||||
const sub_val = Value.initPayload(&sub_pl.base);
|
||||
return eqlAdvanced(sub_val, ty, b, ty, mod, sema_kit);
|
||||
},
|
||||
.ErrorUnion => if (a_tag != .@"error" and b_tag == .eu_payload) {
|
||||
var sub_pl: Payload.SubValue = .{
|
||||
.base = .{ .tag = b.tag() },
|
||||
.data = a,
|
||||
};
|
||||
const sub_val = Value.initPayload(&sub_pl.base);
|
||||
return eqlAdvanced(sub_val, ty, b, ty, mod, sema_kit);
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
if (a_tag == .null_value or a_tag == .@"error") return false;
|
||||
return (try orderAdvanced(a, b, target, sema_kit)).compare(.eq);
|
||||
}
|
||||
|
||||
@ -2436,7 +2477,7 @@ pub const Value = extern union {
|
||||
const sub_ty = ty.optionalChild(&buffer);
|
||||
sub_val.hash(sub_ty, hasher, mod);
|
||||
} else {
|
||||
std.hash.autoHash(hasher, false); // non-null
|
||||
std.hash.autoHash(hasher, false); // null
|
||||
}
|
||||
},
|
||||
.ErrorUnion => {
|
||||
@ -2474,8 +2515,8 @@ pub const Value = extern union {
|
||||
union_obj.val.hash(active_field_ty, hasher, mod);
|
||||
},
|
||||
.Fn => {
|
||||
// Note that his hashes the *Fn/*ExternFn rather than the *Decl. This is
|
||||
// to differentiate function bodies from function pointers.
|
||||
// Note that this hashes the *Fn/*ExternFn rather than the *Decl.
|
||||
// This is to differentiate function bodies from function pointers.
|
||||
// This is currently redundant since we already hash the zig type tag
|
||||
// at the top of this function.
|
||||
if (val.castTag(.function)) |func| {
|
||||
@ -2497,6 +2538,64 @@ pub const Value = extern union {
|
||||
}
|
||||
}
|
||||
|
||||
/// This is a more conservative hash function that produces equal hashes for values
|
||||
/// that can coerce into each other.
|
||||
/// This function is used by hash maps and so treats floating-point NaNs as equal
|
||||
/// to each other, and not equal to other floating-point values.
|
||||
pub fn hashUncoerced(val: Value, ty: Type, hasher: *std.hash.Wyhash, mod: *Module) void {
|
||||
if (val.isUndef()) return;
|
||||
// The value is runtime-known and shouldn't affect the hash.
|
||||
if (val.tag() == .runtime_value) return;
|
||||
|
||||
switch (ty.zigTypeTag()) {
|
||||
.BoundFn => unreachable, // TODO remove this from the language
|
||||
.Opaque => unreachable, // Cannot hash opaque types
|
||||
.Void,
|
||||
.NoReturn,
|
||||
.Undefined,
|
||||
.Null,
|
||||
.Struct, // It sure would be nice to do something clever with structs.
|
||||
=> |zig_type_tag| std.hash.autoHash(hasher, zig_type_tag),
|
||||
.Type => {
|
||||
var buf: ToTypeBuffer = undefined;
|
||||
val.toType(&buf).hashWithHasher(hasher, mod);
|
||||
},
|
||||
.Float, .ComptimeFloat => std.hash.autoHash(hasher, @bitCast(u128, val.toFloat(f128))),
|
||||
.Bool, .Int, .ComptimeInt, .Pointer, .Fn => switch (val.tag()) {
|
||||
.slice => val.castTag(.slice).?.data.ptr.hashPtr(hasher, mod.getTarget()),
|
||||
else => val.hashPtr(hasher, mod.getTarget()),
|
||||
},
|
||||
.Array, .Vector => {
|
||||
const len = ty.arrayLen();
|
||||
const elem_ty = ty.childType();
|
||||
var index: usize = 0;
|
||||
var elem_value_buf: ElemValueBuffer = undefined;
|
||||
while (index < len) : (index += 1) {
|
||||
const elem_val = val.elemValueBuffer(mod, index, &elem_value_buf);
|
||||
elem_val.hashUncoerced(elem_ty, hasher, mod);
|
||||
}
|
||||
},
|
||||
.Optional => if (val.castTag(.opt_payload)) |payload| {
|
||||
var buf: Type.Payload.ElemType = undefined;
|
||||
const child_ty = ty.optionalChild(&buf);
|
||||
payload.data.hashUncoerced(child_ty, hasher, mod);
|
||||
} else std.hash.autoHash(hasher, std.builtin.TypeId.Null),
|
||||
.ErrorSet, .ErrorUnion => if (val.getError()) |err| hasher.update(err) else {
|
||||
const pl_ty = ty.errorUnionPayload();
|
||||
val.castTag(.eu_payload).?.data.hashUncoerced(pl_ty, hasher, mod);
|
||||
},
|
||||
.Enum, .EnumLiteral, .Union => {
|
||||
hasher.update(val.tagName(ty, mod));
|
||||
if (val.cast(Payload.Union)) |union_obj| {
|
||||
const active_field_ty = ty.unionFieldType(union_obj.data.tag, mod);
|
||||
union_obj.data.val.hashUncoerced(active_field_ty, hasher, mod);
|
||||
} else std.hash.autoHash(hasher, std.builtin.TypeId.Void);
|
||||
},
|
||||
.Frame => @panic("TODO implement hashing frame values"),
|
||||
.AnyFrame => @panic("TODO implement hashing anyframe values"),
|
||||
}
|
||||
}
|
||||
|
||||
pub const ArrayHashContext = struct {
|
||||
ty: Type,
|
||||
mod: *Module,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user