Sema: avoid breaking hash contract when instantiating generic functions

* Add tagName to Value which behaves like @tagName.
 * Add hashUncoerced to Value as an alternative to hash when we want to
   produce the same hash for value that can coerce to each other.
 * Hash owner_decl instead of module_fn in Sema.instantiateGenericCall
   since Module.Decl.Index is not affected by ASLR like *Module.Fn was,
   and also because GenericCallAdapter.eql was already doing this.
 * Use Value.hashUncoerced in Sema.instantiateGenericCall because
   GenericCallAdapter.eql uses Value.eqlAdvanced to compare args, which
   ignores coersions.
 * Add revealed missing cases to Value.eqlAdvanced.

Without these changes, we were breaking the hash contract for
monomorphed_funcs, and were generating different hashes for values that
compared equal.  This resulted in a 0.2% chance when compiling
self-hosted of producing a different output, which depended on
fingerprint collisions of hashes that were affected by ASLR.  Normally,
the different hashes would have resulted in equal checks being skipped,
but in the case of a fingerprint collision, the truth would be revealed
and the compiler's behavior would diverge.
This commit is contained in:
Jacob Young 2022-11-07 01:54:58 -05:00 committed by Andrew Kelley
parent 59af6417bb
commit e40c38d258
2 changed files with 114 additions and 15 deletions

View File

@ -6828,7 +6828,7 @@ fn instantiateGenericCall(
// don't find out until after generating a monomorphed function whether the parameter
// type ended up being a "must-be-comptime-known" type.
var hasher = std.hash.Wyhash.init(0);
std.hash.autoHash(&hasher, @ptrToInt(module_fn));
std.hash.autoHash(&hasher, module_fn.owner_decl);
const generic_args = try sema.arena.alloc(GenericCallAdapter.Arg, func_ty_info.param_types.len);
{
@ -6871,7 +6871,7 @@ fn instantiateGenericCall(
},
else => |e| return e,
};
arg_val.hash(arg_ty, &hasher, mod);
arg_val.hashUncoerced(arg_ty, &hasher, mod);
if (is_anytype) {
arg_ty.hashWithHasher(&hasher, mod);
generic_args[i] = .{

View File

@ -1055,6 +1055,40 @@ pub const Value = extern union {
}
}
pub fn tagName(val: Value, ty: Type, mod: *Module) []const u8 {
if (ty.zigTypeTag() == .Union) return val.unionTag().tagName(ty.unionTagTypeHypothetical(), mod);
const field_index = switch (val.tag()) {
.enum_field_index => val.castTag(.enum_field_index).?.data,
.the_only_possible_value => blk: {
assert(ty.enumFieldCount() == 1);
break :blk 0;
},
.enum_literal => return val.castTag(.enum_literal).?.data,
else => field_index: {
const values = switch (ty.tag()) {
.enum_full, .enum_nonexhaustive => ty.cast(Type.Payload.EnumFull).?.data.values,
.enum_numbered => ty.castTag(.enum_numbered).?.data.values,
.enum_simple => Module.EnumFull.ValueMap{},
else => unreachable,
};
break :field_index if (values.entries.len == 0)
// auto-numbered enum
@intCast(u32, val.toUnsignedInt(mod.getTarget()))
else
@intCast(u32, values.getIndexContext(val, .{ .ty = ty, .mod = mod }).?);
},
};
const fields = switch (ty.tag()) {
.enum_full, .enum_nonexhaustive => ty.cast(Type.Payload.EnumFull).?.data.fields,
.enum_numbered => ty.castTag(.enum_numbered).?.data.fields,
.enum_simple => ty.castTag(.enum_simple).?.data.fields,
else => unreachable,
};
return fields.keys()[field_index];
}
/// Asserts the value is an integer.
pub fn toBigInt(val: Value, space: *BigIntSpace, target: Target) BigIntConst {
return val.toBigIntAdvanced(space, target, null) catch unreachable;
@ -2211,7 +2245,7 @@ pub const Value = extern union {
return eqlAdvanced(a_union.val, active_field_ty, b_union.val, active_field_ty, mod, sema_kit);
},
else => {},
} else if (a_tag == .null_value or b_tag == .null_value) {
} else if (b_tag == .null_value or b_tag == .@"error") {
return false;
} else if (a_tag == .undef or b_tag == .undef) {
return false;
@ -2335,18 +2369,25 @@ pub const Value = extern union {
if (a_nan) return true;
return a_float == b_float;
},
.Optional => {
if (a.tag() != .opt_payload and b.tag() == .opt_payload) {
var buffer: Payload.SubValue = .{
.base = .{ .tag = .opt_payload },
.data = a,
};
const opt_val = Value.initPayload(&buffer.base);
return eqlAdvanced(opt_val, ty, b, ty, mod, sema_kit);
}
.Optional => if (a_tag != .null_value and b_tag == .opt_payload) {
var sub_pl: Payload.SubValue = .{
.base = .{ .tag = b.tag() },
.data = a,
};
const sub_val = Value.initPayload(&sub_pl.base);
return eqlAdvanced(sub_val, ty, b, ty, mod, sema_kit);
},
.ErrorUnion => if (a_tag != .@"error" and b_tag == .eu_payload) {
var sub_pl: Payload.SubValue = .{
.base = .{ .tag = b.tag() },
.data = a,
};
const sub_val = Value.initPayload(&sub_pl.base);
return eqlAdvanced(sub_val, ty, b, ty, mod, sema_kit);
},
else => {},
}
if (a_tag == .null_value or a_tag == .@"error") return false;
return (try orderAdvanced(a, b, target, sema_kit)).compare(.eq);
}
@ -2436,7 +2477,7 @@ pub const Value = extern union {
const sub_ty = ty.optionalChild(&buffer);
sub_val.hash(sub_ty, hasher, mod);
} else {
std.hash.autoHash(hasher, false); // non-null
std.hash.autoHash(hasher, false); // null
}
},
.ErrorUnion => {
@ -2474,8 +2515,8 @@ pub const Value = extern union {
union_obj.val.hash(active_field_ty, hasher, mod);
},
.Fn => {
// Note that his hashes the *Fn/*ExternFn rather than the *Decl. This is
// to differentiate function bodies from function pointers.
// Note that this hashes the *Fn/*ExternFn rather than the *Decl.
// This is to differentiate function bodies from function pointers.
// This is currently redundant since we already hash the zig type tag
// at the top of this function.
if (val.castTag(.function)) |func| {
@ -2497,6 +2538,64 @@ pub const Value = extern union {
}
}
/// This is a more conservative hash function that produces equal hashes for values
/// that can coerce into each other.
/// This function is used by hash maps and so treats floating-point NaNs as equal
/// to each other, and not equal to other floating-point values.
pub fn hashUncoerced(val: Value, ty: Type, hasher: *std.hash.Wyhash, mod: *Module) void {
if (val.isUndef()) return;
// The value is runtime-known and shouldn't affect the hash.
if (val.tag() == .runtime_value) return;
switch (ty.zigTypeTag()) {
.BoundFn => unreachable, // TODO remove this from the language
.Opaque => unreachable, // Cannot hash opaque types
.Void,
.NoReturn,
.Undefined,
.Null,
.Struct, // It sure would be nice to do something clever with structs.
=> |zig_type_tag| std.hash.autoHash(hasher, zig_type_tag),
.Type => {
var buf: ToTypeBuffer = undefined;
val.toType(&buf).hashWithHasher(hasher, mod);
},
.Float, .ComptimeFloat => std.hash.autoHash(hasher, @bitCast(u128, val.toFloat(f128))),
.Bool, .Int, .ComptimeInt, .Pointer, .Fn => switch (val.tag()) {
.slice => val.castTag(.slice).?.data.ptr.hashPtr(hasher, mod.getTarget()),
else => val.hashPtr(hasher, mod.getTarget()),
},
.Array, .Vector => {
const len = ty.arrayLen();
const elem_ty = ty.childType();
var index: usize = 0;
var elem_value_buf: ElemValueBuffer = undefined;
while (index < len) : (index += 1) {
const elem_val = val.elemValueBuffer(mod, index, &elem_value_buf);
elem_val.hashUncoerced(elem_ty, hasher, mod);
}
},
.Optional => if (val.castTag(.opt_payload)) |payload| {
var buf: Type.Payload.ElemType = undefined;
const child_ty = ty.optionalChild(&buf);
payload.data.hashUncoerced(child_ty, hasher, mod);
} else std.hash.autoHash(hasher, std.builtin.TypeId.Null),
.ErrorSet, .ErrorUnion => if (val.getError()) |err| hasher.update(err) else {
const pl_ty = ty.errorUnionPayload();
val.castTag(.eu_payload).?.data.hashUncoerced(pl_ty, hasher, mod);
},
.Enum, .EnumLiteral, .Union => {
hasher.update(val.tagName(ty, mod));
if (val.cast(Payload.Union)) |union_obj| {
const active_field_ty = ty.unionFieldType(union_obj.data.tag, mod);
union_obj.data.val.hashUncoerced(active_field_ty, hasher, mod);
} else std.hash.autoHash(hasher, std.builtin.TypeId.Void);
},
.Frame => @panic("TODO implement hashing frame values"),
.AnyFrame => @panic("TODO implement hashing anyframe values"),
}
}
pub const ArrayHashContext = struct {
ty: Type,
mod: *Module,