Merge pull request #6164 from jedisct1/cryptobench

Improve crypto benchmarks
This commit is contained in:
Andrew Kelley 2020-08-26 17:30:31 -04:00 committed by GitHub
commit 091d693c53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 150 additions and 114 deletions

View File

@ -7,6 +7,7 @@
const builtin = @import("builtin");
const std = @import("std");
const mem = std.mem;
const time = std.time;
const Timer = time.Timer;
const crypto = std.crypto;
@ -46,6 +47,7 @@ pub fn benchmarkHash(comptime Hash: anytype, comptime bytes: comptime_int) !u64
while (offset < bytes) : (offset += block.len) {
h.update(block[0..]);
}
mem.doNotOptimizeAway(&h);
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
@ -67,19 +69,20 @@ const macs = [_]Crypto{
};
pub fn benchmarkMac(comptime Mac: anytype, comptime bytes: comptime_int) !u64 {
std.debug.assert(64 >= Mac.mac_length and 32 >= Mac.minimum_key_length);
var in: [1 * MiB]u8 = undefined;
var in: [512 * KiB]u8 = undefined;
prng.random.bytes(in[0..]);
var key: [64]u8 = undefined;
const key_length = if (Mac.minimum_key_length == 0) 32 else Mac.minimum_key_length;
var key: [key_length]u8 = undefined;
prng.random.bytes(key[0..]);
var mac: [Mac.mac_length]u8 = undefined;
var offset: usize = 0;
var timer = try Timer.start();
const start = timer.lap();
while (offset < bytes) : (offset += in.len) {
Mac.create(key[0..], in[0..], key[0..]);
Mac.create(mac[0..], in[0..], key[0..]);
mem.doNotOptimizeAway(&mac);
}
const end = timer.read();
@ -106,6 +109,7 @@ pub fn benchmarkKeyExchange(comptime DhKeyExchange: anytype, comptime exchange_c
var i: usize = 0;
while (i < exchange_count) : (i += 1) {
_ = DhKeyExchange.create(out[0..], out[0..], in[0..]);
mem.doNotOptimizeAway(&out);
}
}
const end = timer.read();
@ -118,7 +122,7 @@ pub fn benchmarkKeyExchange(comptime DhKeyExchange: anytype, comptime exchange_c
const signatures = [_]Crypto{Crypto{ .ty = crypto.sign.Ed25519, .name = "ed25519" }};
pub fn benchmarkSignatures(comptime Signature: anytype, comptime signatures_count: comptime_int) !u64 {
pub fn benchmarkSignature(comptime Signature: anytype, comptime signatures_count: comptime_int) !u64 {
var seed: [Signature.seed_length]u8 = undefined;
prng.random.bytes(seed[0..]);
const msg = [_]u8{0} ** 64;
@ -129,7 +133,8 @@ pub fn benchmarkSignatures(comptime Signature: anytype, comptime signatures_coun
{
var i: usize = 0;
while (i < signatures_count) : (i += 1) {
_ = try Signature.sign(&msg, key_pair, null);
const s = try Signature.sign(&msg, key_pair, null);
mem.doNotOptimizeAway(&s);
}
}
const end = timer.read();
@ -140,6 +145,40 @@ pub fn benchmarkSignatures(comptime Signature: anytype, comptime signatures_coun
return throughput;
}
const aeads = [_]Crypto{
Crypto{ .ty = crypto.aead.ChaCha20Poly1305, .name = "chacha20Poly1305" },
Crypto{ .ty = crypto.aead.XChaCha20Poly1305, .name = "xchacha20Poly1305" },
Crypto{ .ty = crypto.aead.Gimli, .name = "gimli-aead" },
};
pub fn benchmarkAead(comptime Aead: anytype, comptime bytes: comptime_int) !u64 {
var in: [512 * KiB]u8 = undefined;
prng.random.bytes(in[0..]);
var tag: [Aead.tag_length]u8 = undefined;
var key: [Aead.key_length]u8 = undefined;
prng.random.bytes(key[0..]);
var nonce: [Aead.nonce_length]u8 = undefined;
prng.random.bytes(nonce[0..]);
var offset: usize = 0;
var timer = try Timer.start();
const start = timer.lap();
while (offset < bytes) : (offset += in.len) {
Aead.encrypt(in[0..], tag[0..], in[0..], &[_]u8{}, nonce, key);
Aead.decrypt(in[0..], in[0..], tag, &[_]u8{}, nonce, key) catch unreachable;
}
mem.doNotOptimizeAway(&in);
const end = timer.read();
const elapsed_s = @intToFloat(f64, end - start) / time.ns_per_s;
const throughput = @floatToInt(u64, 2 * bytes / elapsed_s);
return throughput;
}
fn usage() void {
std.debug.warn(
\\throughput_test [options]
@ -198,29 +237,36 @@ pub fn main() !void {
inline for (hashes) |H| {
if (filter == null or std.mem.indexOf(u8, H.name, filter.?) != null) {
const throughput = try benchmarkHash(H.ty, mode(32 * MiB));
try stdout.print("{:>11}: {:5} MiB/s\n", .{ H.name, throughput / (1 * MiB) });
const throughput = try benchmarkHash(H.ty, mode(128 * MiB));
try stdout.print("{:>17}: {:7} MiB/s\n", .{ H.name, throughput / (1 * MiB) });
}
}
inline for (macs) |M| {
if (filter == null or std.mem.indexOf(u8, M.name, filter.?) != null) {
const throughput = try benchmarkMac(M.ty, mode(128 * MiB));
try stdout.print("{:>11}: {:5} MiB/s\n", .{ M.name, throughput / (1 * MiB) });
try stdout.print("{:>17}: {:7} MiB/s\n", .{ M.name, throughput / (1 * MiB) });
}
}
inline for (exchanges) |E| {
if (filter == null or std.mem.indexOf(u8, E.name, filter.?) != null) {
const throughput = try benchmarkKeyExchange(E.ty, mode(1000));
try stdout.print("{:>11}: {:5} exchanges/s\n", .{ E.name, throughput });
try stdout.print("{:>17}: {:7} exchanges/s\n", .{ E.name, throughput });
}
}
inline for (signatures) |E| {
if (filter == null or std.mem.indexOf(u8, E.name, filter.?) != null) {
const throughput = try benchmarkSignatures(E.ty, mode(1000));
try stdout.print("{:>11}: {:5} signatures/s\n", .{ E.name, throughput });
const throughput = try benchmarkSignature(E.ty, mode(1000));
try stdout.print("{:>17}: {:7} signatures/s\n", .{ E.name, throughput });
}
}
inline for (aeads) |E| {
if (filter == null or std.mem.indexOf(u8, E.name, filter.?) != null) {
const throughput = try benchmarkAead(E.ty, mode(128 * MiB));
try stdout.print("{:>17}: {:7} MiB/s\n", .{ E.name, throughput / (1 * MiB) });
}
}
}

View File

@ -47,7 +47,7 @@ fn initContext(key: [8]u32, d: [4]u32) [16]u32 {
}
// The chacha family of ciphers are based on the salsa family.
fn chacha20Core(x: []u32, input: [16]u32) void {
inline fn chacha20Core(x: []u32, input: [16]u32) void {
for (x) |_, i|
x[i] = input[i];
@ -744,26 +744,26 @@ pub const Chacha20Poly1305 = struct {
pub const key_length = 32;
/// c: ciphertext: output buffer should be of size m.len
/// at: authentication tag: output MAC
/// tag: authentication tag: output MAC
/// m: message
/// ad: Associated Data
/// npub: public nonce
/// k: private key
pub fn encrypt(c: []u8, at: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
assert(c.len == m.len);
return chacha20poly1305SealDetached(c, at, m, ad, k, npub);
return chacha20poly1305SealDetached(c, tag, m, ad, k, npub);
}
/// m: message: output buffer should be of size c.len
/// c: ciphertext
/// at: authentication tag
/// tag: authentication tag
/// ad: Associated Data
/// npub: public nonce
/// k: private key
/// NOTE: the check of the authentication tag is currently not done in constant time
pub fn decrypt(m: []u8, c: []const u8, at: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
assert(c.len == m.len);
return try chacha20poly1305OpenDetached(m, c, at[0..], ad, k, npub);
return try chacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
}
};
@ -773,26 +773,26 @@ pub const XChacha20Poly1305 = struct {
pub const key_length = 32;
/// c: ciphertext: output buffer should be of size m.len
/// at: authentication tag: output MAC
/// tag: authentication tag: output MAC
/// m: message
/// ad: Associated Data
/// npub: public nonce
/// k: private key
pub fn encrypt(c: []u8, at: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
assert(c.len == m.len);
return xchacha20poly1305SealDetached(c, at, m, ad, k, npub);
return xchacha20poly1305SealDetached(c, tag, m, ad, k, npub);
}
/// m: message: output buffer should be of size c.len
/// c: ciphertext
/// at: authentication tag
/// tag: authentication tag
/// ad: Associated Data
/// npub: public nonce
/// k: private key
/// NOTE: the check of the authentication tag is currently not done in constant time
pub fn decrypt(m: []u8, c: []const u8, at: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
assert(c.len == m.len);
return try xchacha20poly1305OpenDetached(m, c, at[0..], ad, k, npub);
return try xchacha20poly1305OpenDetached(m, c, tag[0..], ad, k, npub);
}
};

View File

@ -180,10 +180,14 @@ test "hash" {
}
pub const Aead = struct {
pub const tag_length = State.RATE;
pub const nonce_length = 16;
pub const key_length = 32;
/// ad: Associated Data
/// npub: public nonce
/// k: private key
fn init(ad: []const u8, npub: [16]u8, k: [32]u8) State {
fn init(ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) State {
var state = State{
.data = undefined,
};
@ -224,12 +228,12 @@ pub const Aead = struct {
}
/// c: ciphertext: output buffer should be of size m.len
/// at: authentication tag: output MAC
/// tag: authentication tag: output MAC
/// m: message
/// ad: Associated Data
/// npub: public nonce
/// k: private key
pub fn encrypt(c: []u8, at: *[State.RATE]u8, m: []const u8, ad: []const u8, npub: [16]u8, k: [32]u8) void {
pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) void {
assert(c.len == m.len);
var state = Aead.init(ad, npub, k);
@ -265,17 +269,17 @@ pub const Aead = struct {
// After the final non-full block of plaintext, the first 16 bytes
// of the state are output as an authentication tag.
std.mem.copy(u8, at, buf[0..State.RATE]);
std.mem.copy(u8, tag, buf[0..State.RATE]);
}
/// m: message: output buffer should be of size c.len
/// c: ciphertext
/// at: authentication tag
/// tag: authentication tag
/// ad: Associated Data
/// npub: public nonce
/// k: private key
/// NOTE: the check of the authentication tag is currently not done in constant time
pub fn decrypt(m: []u8, c: []const u8, at: [State.RATE]u8, ad: []const u8, npub: [16]u8, k: [32]u8) !void {
pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, k: [key_length]u8) !void {
assert(c.len == m.len);
var state = Aead.init(ad, npub, k);
@ -308,7 +312,7 @@ pub const Aead = struct {
// After the final non-full block of plaintext, the first 16 bytes
// of the state are the authentication tag.
// TODO: use a constant-time equality check here, see https://github.com/ziglang/zig/issues/1776
if (!mem.eql(u8, buf[0..State.RATE], &at)) {
if (!mem.eql(u8, buf[0..State.RATE], &tag)) {
@memset(m.ptr, undefined, m.len);
return error.InvalidMessage;
}
@ -328,13 +332,13 @@ test "cipher" {
const pt: [0]u8 = undefined;
var ct: [pt.len]u8 = undefined;
var at: [16]u8 = undefined;
Aead.encrypt(&ct, &at, &pt, &ad, nonce, key);
var tag: [16]u8 = undefined;
Aead.encrypt(&ct, &tag, &pt, &ad, nonce, key);
htest.assertEqual("", &ct);
htest.assertEqual("14DA9BB7120BF58B985A8E00FDEBA15B", &at);
htest.assertEqual("14DA9BB7120BF58B985A8E00FDEBA15B", &tag);
var pt2: [pt.len]u8 = undefined;
try Aead.decrypt(&pt2, &ct, at, &ad, nonce, key);
try Aead.decrypt(&pt2, &ct, tag, &ad, nonce, key);
testing.expectEqualSlices(u8, &pt, &pt2);
}
{ // test vector (34) from NIST KAT submission.
@ -343,13 +347,13 @@ test "cipher" {
try std.fmt.hexToBytes(&pt, "00");
var ct: [pt.len]u8 = undefined;
var at: [16]u8 = undefined;
Aead.encrypt(&ct, &at, &pt, &ad, nonce, key);
var tag: [16]u8 = undefined;
Aead.encrypt(&ct, &tag, &pt, &ad, nonce, key);
htest.assertEqual("7F", &ct);
htest.assertEqual("80492C317B1CD58A1EDC3A0D3E9876FC", &at);
htest.assertEqual("80492C317B1CD58A1EDC3A0D3E9876FC", &tag);
var pt2: [pt.len]u8 = undefined;
try Aead.decrypt(&pt2, &ct, at, &ad, nonce, key);
try Aead.decrypt(&pt2, &ct, tag, &ad, nonce, key);
testing.expectEqualSlices(u8, &pt, &pt2);
}
{ // test vector (106) from NIST KAT submission.
@ -359,13 +363,13 @@ test "cipher" {
try std.fmt.hexToBytes(&pt, "000102");
var ct: [pt.len]u8 = undefined;
var at: [16]u8 = undefined;
Aead.encrypt(&ct, &at, &pt, &ad, nonce, key);
var tag: [16]u8 = undefined;
Aead.encrypt(&ct, &tag, &pt, &ad, nonce, key);
htest.assertEqual("484D35", &ct);
htest.assertEqual("030BBEA23B61C00CED60A923BDCF9147", &at);
htest.assertEqual("030BBEA23B61C00CED60A923BDCF9147", &tag);
var pt2: [pt.len]u8 = undefined;
try Aead.decrypt(&pt2, &ct, at, &ad, nonce, key);
try Aead.decrypt(&pt2, &ct, tag, &ad, nonce, key);
testing.expectEqualSlices(u8, &pt, &pt2);
}
{ // test vector (790) from NIST KAT submission.
@ -375,13 +379,13 @@ test "cipher" {
try std.fmt.hexToBytes(&pt, "000102030405060708090A0B0C0D0E0F10111213141516");
var ct: [pt.len]u8 = undefined;
var at: [16]u8 = undefined;
Aead.encrypt(&ct, &at, &pt, &ad, nonce, key);
var tag: [16]u8 = undefined;
Aead.encrypt(&ct, &tag, &pt, &ad, nonce, key);
htest.assertEqual("6815B4A0ECDAD01596EAD87D9E690697475D234C6A13D1", &ct);
htest.assertEqual("DFE23F1642508290D68245279558B2FB", &at);
htest.assertEqual("DFE23F1642508290D68245279558B2FB", &tag);
var pt2: [pt.len]u8 = undefined;
try Aead.decrypt(&pt2, &ct, at, &ad, nonce, key);
try Aead.decrypt(&pt2, &ct, tag, &ad, nonce, key);
testing.expectEqualSlices(u8, &pt, &pt2);
}
{ // test vector (1057) from NIST KAT submission.
@ -390,13 +394,13 @@ test "cipher" {
try std.fmt.hexToBytes(&pt, "000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F");
var ct: [pt.len]u8 = undefined;
var at: [16]u8 = undefined;
Aead.encrypt(&ct, &at, &pt, &ad, nonce, key);
var tag: [16]u8 = undefined;
Aead.encrypt(&ct, &tag, &pt, &ad, nonce, key);
htest.assertEqual("7F8A2CF4F52AA4D6B2E74105C30A2777B9D0C8AEFDD555DE35861BD3011F652F", &ct);
htest.assertEqual("7256456FA935AC34BBF55AE135F33257", &at);
htest.assertEqual("7256456FA935AC34BBF55AE135F33257", &tag);
var pt2: [pt.len]u8 = undefined;
try Aead.decrypt(&pt2, &ct, at, &ad, nonce, key);
try Aead.decrypt(&pt2, &ct, tag, &ad, nonce, key);
testing.expectEqualSlices(u8, &pt, &pt2);
}
}

View File

@ -5,6 +5,7 @@
// and substantial portions of the software.
const std = @import("std.zig");
const assert = std.debug.assert;
const mem = std.mem;
const testing = std.testing;
/// Euler's number (e)
@ -108,34 +109,8 @@ pub fn approxEq(comptime T: type, x: T, y: T, epsilon: T) bool {
return fabs(x - y) < epsilon;
}
// TODO: Hide the following in an internal module.
pub fn forceEval(value: anytype) void {
const T = @TypeOf(value);
switch (T) {
f16 => {
var x: f16 = undefined;
const p = @ptrCast(*volatile f16, &x);
p.* = x;
},
f32 => {
var x: f32 = undefined;
const p = @ptrCast(*volatile f32, &x);
p.* = x;
},
f64 => {
var x: f64 = undefined;
const p = @ptrCast(*volatile f64, &x);
p.* = x;
},
f128 => {
var x: f128 = undefined;
const p = @ptrCast(*volatile f128, &x);
p.* = x;
},
else => {
@compileError("forceEval not implemented for " ++ @typeName(T));
},
}
pub fn doNotOptimizeAway(value: anytype) void {
mem.doNotOptimizeAway(value);
}
pub fn raiseInvalid() void {

View File

@ -56,7 +56,7 @@ fn asinh32(x: f32) f32 {
}
// |x| < 0x1p-12, inexact if x != 0
else {
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
}
return if (s != 0) -rx else rx;
@ -87,7 +87,7 @@ fn asinh64(x: f64) f64 {
}
// |x| < 0x1p-12, inexact if x != 0
else {
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
}
return if (s != 0) -rx else rx;

View File

@ -72,7 +72,7 @@ fn atan32(x_: f32) f32 {
// |x| < 2^(-12)
if (ix < 0x39800000) {
if (ix < 0x00800000) {
math.forceEval(x * x);
math.doNotOptimizeAway(x * x);
}
return x;
}
@ -170,7 +170,7 @@ fn atan64(x_: f64) f64 {
// |x| < 2^(-27)
if (ix < 0x3E400000) {
if (ix < 0x00100000) {
math.forceEval(@floatCast(f32, x));
math.doNotOptimizeAway(@floatCast(f32, x));
}
return x;
}

View File

@ -45,7 +45,7 @@ fn atanh_32(x: f32) f32 {
if (u < 0x3F800000 - (32 << 23)) {
// underflow
if (u < (1 << 23)) {
math.forceEval(y * y);
math.doNotOptimizeAway(y * y);
}
}
// |x| < 0.5
@ -74,7 +74,7 @@ fn atanh_64(x: f64) f64 {
if (e < 0x3FF - 32) {
// underflow
if (e == 0) {
math.forceEval(@floatCast(f32, y));
math.doNotOptimizeAway(@floatCast(f32, y));
}
}
// |x| < 0.5

View File

@ -47,14 +47,14 @@ fn ceil32(x: f32) f32 {
if (u & m == 0) {
return x;
}
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 == 0) {
u += m;
}
u &= ~m;
return @bitCast(f32, u);
} else {
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 != 0) {
return -0.0;
} else {
@ -79,7 +79,7 @@ fn ceil64(x: f64) f64 {
}
if (e <= 0x3FF - 1) {
math.forceEval(y);
math.doNotOptimizeAway(y);
if (u >> 63 != 0) {
return -0.0;
} else {
@ -106,7 +106,7 @@ fn ceil128(x: f128) f128 {
}
if (e <= 0x3FFF - 1) {
math.forceEval(y);
math.doNotOptimizeAway(y);
if (u >> 127 != 0) {
return -0.0;
} else {

View File

@ -56,7 +56,7 @@ fn exp32(x_: f32) f32 {
return x * 0x1.0p127;
}
if (sign != 0) {
math.forceEval(-0x1.0p-149 / x); // overflow
math.doNotOptimizeAway(-0x1.0p-149 / x); // overflow
// x <= -103.972084
if (hx >= 0x42CFF1B5) {
return 0;
@ -88,7 +88,7 @@ fn exp32(x_: f32) f32 {
hi = x;
lo = 0;
} else {
math.forceEval(0x1.0p127 + x); // inexact
math.doNotOptimizeAway(0x1.0p127 + x); // inexact
return 1 + x;
}
@ -139,7 +139,7 @@ fn exp64(x_: f64) f64 {
}
if (x < -708.39641853226410622) {
// underflow if x != -inf
// math.forceEval(@as(f32, -0x1.0p-149 / x));
// math.doNotOptimizeAway(@as(f32, -0x1.0p-149 / x));
if (x < -745.13321910194110842) {
return 0;
}
@ -172,7 +172,7 @@ fn exp64(x_: f64) f64 {
lo = 0;
} else {
// inexact if x != 0
// math.forceEval(0x1.0p1023 + x);
// math.doNotOptimizeAway(0x1.0p1023 + x);
return 1 + x;
}

View File

@ -70,7 +70,7 @@ fn exp2_32(x: f32) f32 {
// x < -126
if (u >= 0x80000000) {
if (u >= 0xC3160000 or u & 0x000FFFF != 0) {
math.forceEval(-0x1.0p-149 / x);
math.doNotOptimizeAway(-0x1.0p-149 / x);
}
// x <= -150
if (u >= 0x3160000) {
@ -393,7 +393,7 @@ fn exp2_64(x: f64) f64 {
if (ux >> 63 != 0) {
// underflow
if (x <= -1075 or x - 0x1.0p52 + 0x1.0p52 != x) {
math.forceEval(@floatCast(f32, -0x1.0p-149 / x));
math.doNotOptimizeAway(@floatCast(f32, -0x1.0p-149 / x));
}
if (x <= -1075) {
return 0;

View File

@ -106,7 +106,7 @@ fn expm1_32(x_: f32) f32 {
// |x| < 2^(-25)
else if (hx < 0x33000000) {
if (hx < 0x00800000) {
math.forceEval(x * x);
math.doNotOptimizeAway(x * x);
}
return x;
} else {
@ -237,7 +237,7 @@ fn expm1_64(x_: f64) f64 {
// |x| < 2^(-54)
else if (hx < 0x3C900000) {
if (hx < 0x00100000) {
math.forceEval(@floatCast(f32, x));
math.doNotOptimizeAway(@floatCast(f32, x));
}
return x;
} else {

View File

@ -50,13 +50,13 @@ fn floor16(x: f16) f16 {
if (u & m == 0) {
return x;
}
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 15 != 0) {
u += m;
}
return @bitCast(f16, u & ~m);
} else {
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 15 == 0) {
return 0.0;
} else {
@ -84,13 +84,13 @@ fn floor32(x: f32) f32 {
if (u & m == 0) {
return x;
}
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 != 0) {
u += m;
}
return @bitCast(f32, u & ~m);
} else {
math.forceEval(x + 0x1.0p120);
math.doNotOptimizeAway(x + 0x1.0p120);
if (u >> 31 == 0) {
return 0.0;
} else {
@ -115,7 +115,7 @@ fn floor64(x: f64) f64 {
}
if (e <= 0x3FF - 1) {
math.forceEval(y);
math.doNotOptimizeAway(y);
if (u >> 63 != 0) {
return -1.0;
} else {
@ -142,7 +142,7 @@ fn floor128(x: f128) f128 {
}
if (e <= 0x3FFF - 1) {
math.forceEval(y);
math.doNotOptimizeAway(y);
if (u >> 127 != 0) {
return -1.0;
} else {

View File

@ -62,7 +62,7 @@ fn log1p_32(x: f32) f32 {
if ((ix << 1) < (0x33800000 << 1)) {
// underflow if subnormal
if (ix & 0x7F800000 == 0) {
math.forceEval(x * x);
math.doNotOptimizeAway(x * x);
}
return x;
}

View File

@ -43,7 +43,7 @@ fn round32(x_: f32) f32 {
x = -x;
}
if (e < 0x7F - 1) {
math.forceEval(x + math.f32_toint);
math.doNotOptimizeAway(x + math.f32_toint);
return 0 * @bitCast(f32, u);
}
@ -76,7 +76,7 @@ fn round64(x_: f64) f64 {
x = -x;
}
if (e < 0x3ff - 1) {
math.forceEval(x + math.f64_toint);
math.doNotOptimizeAway(x + math.f64_toint);
return 0 * @bitCast(f64, u);
}
@ -109,7 +109,7 @@ fn round128(x_: f128) f128 {
x = -x;
}
if (e < 0x3FFF - 1) {
math.forceEval(x + math.f64_toint);
math.doNotOptimizeAway(x + math.f64_toint);
return 0 * @bitCast(f128, u);
}

View File

@ -67,7 +67,7 @@ fn tanh32(x: f32) f32 {
}
// |x| is subnormal
else {
math.forceEval(x * x);
math.doNotOptimizeAway(x * x);
t = x;
}
@ -112,7 +112,7 @@ fn tanh64(x: f64) f64 {
}
// |x| is subnormal
else {
math.forceEval(@floatCast(f32, x));
math.doNotOptimizeAway(@floatCast(f32, x));
t = x;
}

View File

@ -46,7 +46,7 @@ fn trunc32(x: f32) f32 {
if (u & m == 0) {
return x;
} else {
math.forceEval(x + 0x1p120);
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f32, u & ~m);
}
}
@ -67,7 +67,7 @@ fn trunc64(x: f64) f64 {
if (u & m == 0) {
return x;
} else {
math.forceEval(x + 0x1p120);
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f64, u & ~m);
}
}
@ -88,7 +88,7 @@ fn trunc128(x: f128) f128 {
if (u & m == 0) {
return x;
} else {
math.forceEval(x + 0x1p120);
math.doNotOptimizeAway(x + 0x1p120);
return @bitCast(f128, u & ~m);
}
}

View File

@ -2158,6 +2158,17 @@ pub fn alignForwardGeneric(comptime T: type, addr: T, alignment: T) T {
return alignBackwardGeneric(T, addr + (alignment - 1), alignment);
}
/// Force an evaluation of the expression; this tries to prevent
/// the compiler from optimizing the computation away even if the
/// result eventually gets discarded.
pub fn doNotOptimizeAway(val: anytype) void {
asm volatile (""
:
: [val] "rm" (val)
: "memory"
);
}
test "alignForward" {
testing.expect(alignForward(1, 1) == 1);
testing.expect(alignForward(2, 1) == 2);