From c639c225444c9252515949786e139494fb728861 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 18 Oct 2022 11:33:24 -0700
Subject: [PATCH 1/5] std.mem: Add readPackedInt, writePackedInt, etc.

These utility functions allow reading from (stage2) packed memory at
runtime-known offsets.
---
 lib/std/mem.zig | 460 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 460 insertions(+)

diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 4000030fc0..f0403942f1 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -1298,6 +1298,76 @@ pub fn readVarInt(comptime ReturnType: type, bytes: []const u8, endian: Endian)
     return result;
 }
 
+/// Loads an integer from packed memory with provided bit_count, bit_offset, and signedness.
+/// Asserts that T is large enough to store the read value.
+///
+/// Example:
+///     const T = packed struct(u16){ a: u3, b: u7, c: u6 };
+///     var st = T{ .a = 1, .b = 2, .c = 4 };
+///     const b_field = readVarPackedInt(u64, std.mem.asBytes(&st), @bitOffsetOf(T, "b"), 7, builtin.cpu.arch.endian(), .unsigned);
+///
+pub fn readVarPackedInt(
+    comptime T: type,
+    bytes: []const u8,
+    bit_offset: usize,
+    bit_count: usize,
+    endian: std.builtin.Endian,
+    signedness: std.builtin.Signedness,
+) T {
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const iN = std.meta.Int(.signed, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const read_size = (bit_count + (bit_offset % 8) + 7) / 8;
+    const bit_shift = @intCast(u3, bit_offset % 8);
+    const pad = @intCast(Log2N, @bitSizeOf(T) - bit_count);
+
+    const lowest_byte = switch (endian) {
+        .Big => bytes.len - (bit_offset / 8) - read_size,
+        .Little => bit_offset / 8,
+    };
+    const read_bytes = bytes[lowest_byte..][0..read_size];
+
+    if (@bitSizeOf(T) <= 8) {
+        // These are the same shifts/masks we perform below, but adds `@truncate`/`@intCast`
+        // where needed since int is smaller than a byte.
+        const value = if (read_size == 1) b: {
+            break :b @truncate(uN, read_bytes[0] >> bit_shift);
+        } else b: {
+            const i: u1 = @boolToInt(endian == .Big);
+            const head = @truncate(uN, read_bytes[i] >> bit_shift);
+            const tail_shift = @intCast(Log2N, @as(u4, 8) - bit_shift);
+            const tail = @truncate(uN, read_bytes[1 - i]);
+            break :b (tail << tail_shift) | head;
+        };
+        switch (signedness) {
+            .signed => return @intCast(T, (@bitCast(iN, value) << pad) >> pad),
+            .unsigned => return @intCast(T, (@bitCast(uN, value) << pad) >> pad),
+        }
+    }
+
+    // Copy the value out (respecting endianness), accounting for bit_shift
+    var int: uN = 0;
+    switch (endian) {
+        .Big => {
+            for (read_bytes[0 .. read_size - 1]) |elem| {
+                int = elem | (int << 8);
+            }
+            int = (read_bytes[read_size - 1] >> bit_shift) | (int << (@as(u4, 8) - bit_shift));
+        },
+        .Little => {
+            int = read_bytes[0] >> bit_shift;
+            for (read_bytes[1..]) |elem, i| {
+                int |= (@as(uN, elem) << @intCast(Log2N, (8 * (i + 1) - bit_shift)));
+            }
+        },
+    }
+    switch (signedness) {
+        .signed => return @intCast(T, (@bitCast(iN, int) << pad) >> pad),
+        .unsigned => return @intCast(T, (@bitCast(uN, int) << pad) >> pad),
+    }
+}
+
 /// Reads an integer from memory with bit count specified by T.
 /// The bit count of T must be evenly divisible by 8.
 /// This function cannot fail and cannot cause undefined behavior.
@@ -1365,6 +1435,84 @@ pub fn readInt(comptime T: type, bytes: *const [@divExact(@typeInfo(T).Int.bits,
     }
 }
 
+fn readPackedIntLittle(comptime T: type, bytes: []const u8, bit_offset: usize) T {
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const bit_count = @as(usize, @bitSizeOf(T));
+    const bit_shift = @intCast(u3, bit_offset % 8);
+
+    const load_size = (bit_count + 7) / 8;
+    const load_tail_bits = @intCast(u3, (load_size * 8) - bit_count);
+    const LoadInt = std.meta.Int(.unsigned, load_size * 8);
+
+    if (bit_count == 0)
+        return 0;
+
+    // Read by loading a LoadInt, and then follow it up with a 1-byte read
+    // of the tail if bit_offset pushed us over a byte boundary.
+    const read_bytes = bytes[bit_offset / 8 ..];
+    const val = @truncate(uN, readIntLittle(LoadInt, read_bytes[0..load_size]) >> bit_shift);
+    if (bit_shift > load_tail_bits) {
+        const tail_bits = @intCast(Log2N, bit_shift - load_tail_bits);
+        const tail_byte = read_bytes[load_size];
+        const tail_truncated = if (bit_count < 8) @truncate(uN, tail_byte) else @as(uN, tail_byte);
+        return @bitCast(T, val | (tail_truncated << (@truncate(Log2N, bit_count) -% tail_bits)));
+    } else return @bitCast(T, val);
+}
+
+fn readPackedIntBig(comptime T: type, bytes: []const u8, bit_offset: usize) T {
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const bit_count = @as(usize, @bitSizeOf(T));
+    const bit_shift = @intCast(u3, bit_offset % 8);
+    const byte_count = (@as(usize, bit_shift) + bit_count + 7) / 8;
+
+    const load_size = (bit_count + 7) / 8;
+    const load_tail_bits = @intCast(u3, (load_size * 8) - bit_count);
+    const LoadInt = std.meta.Int(.unsigned, load_size * 8);
+
+    if (bit_count == 0)
+        return 0;
+
+    // Read by loading a LoadInt, and then follow it up with a 1-byte read
+    // of the tail if bit_offset pushed us over a byte boundary.
+    const end = bytes.len - (bit_offset / 8);
+    const read_bytes = bytes[(end - byte_count)..end];
+    const val = @truncate(uN, readIntBig(LoadInt, bytes[(end - load_size)..end][0..load_size]) >> bit_shift);
+    if (bit_shift > load_tail_bits) {
+        const tail_bits = @intCast(Log2N, bit_shift - load_tail_bits);
+        const tail_byte = if (bit_count < 8) @truncate(uN, read_bytes[0]) else @as(uN, read_bytes[0]);
+        return @bitCast(T, val | (tail_byte << (@truncate(Log2N, bit_count) -% tail_bits)));
+    } else return @bitCast(T, val);
+}
+
+pub const readPackedIntNative = switch (native_endian) {
+    .Little => readPackedIntLittle,
+    .Big => readPackedIntBig,
+};
+
+pub const readPackedIntForeign = switch (native_endian) {
+    .Little => readPackedIntBig,
+    .Big => readPackedIntLittle,
+};
+
+/// Loads an integer from packed memory.
+/// Asserts that buffer contains at least bit_offset + @bitSizeOf(T) bits.
+///
+/// Example:
+///     const T = packed struct(u16){ a: u3, b: u7, c: u6 };
+///     var st = T{ .a = 1, .b = 2, .c = 4 };
+///     const b_field = readPackedInt(u7, std.mem.asBytes(&st), @bitOffsetOf(T, "b"), builtin.cpu.arch.endian());
+///
+pub fn readPackedInt(comptime T: type, bytes: []const u8, bit_offset: usize, endian: Endian) T {
+    switch (endian) {
+        .Little => return readPackedIntLittle(T, bytes, bit_offset),
+        .Big => return readPackedIntBig(T, bytes, bit_offset),
+    }
+}
+
 /// Asserts that bytes.len >= @typeInfo(T).Int.bits / 8. Reads the integer starting from index 0
 /// and ignores extra bytes.
 /// The bit count of T must be evenly divisible by 8.
@@ -1447,6 +1595,100 @@ pub fn writeInt(comptime T: type, buffer: *[@divExact(@typeInfo(T).Int.bits, 8)]
     }
 }
 
+pub fn writePackedIntLittle(comptime T: type, bytes: []u8, bit_offset: usize, value: T) void {
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const bit_count = @as(usize, @bitSizeOf(T));
+    const bit_shift = @intCast(u3, bit_offset % 8);
+
+    const store_size = (@bitSizeOf(T) + 7) / 8;
+    const store_tail_bits = @intCast(u3, (store_size * 8) - bit_count);
+    const StoreInt = std.meta.Int(.unsigned, store_size * 8);
+
+    if (bit_count == 0)
+        return;
+
+    // Write by storing a StoreInt, and then follow it up with a 1-byte tail
+    // if bit_offset pushed us over a byte boundary.
+    const write_bytes = bytes[bit_offset / 8 ..];
+    const head = write_bytes[0] & ((@as(u8, 1) << bit_shift) - 1);
+
+    var write_value = (@as(StoreInt, @bitCast(uN, value)) << bit_shift) | @intCast(StoreInt, head);
+    if (bit_shift > store_tail_bits) {
+        const tail_len = @intCast(Log2N, bit_shift - store_tail_bits);
+        write_bytes[store_size] &= ~((@as(u8, 1) << @intCast(u3, tail_len)) - 1);
+        write_bytes[store_size] |= @intCast(u8, (@bitCast(uN, value) >> (@truncate(Log2N, bit_count) -% tail_len)));
+    } else if (bit_shift < store_tail_bits) {
+        const tail_len = store_tail_bits - bit_shift;
+        const tail = write_bytes[store_size - 1] & (@as(u8, 0xfe) << (7 - tail_len));
+        write_value |= @as(StoreInt, tail) << (8 * (store_size - 1));
+    }
+
+    writeIntLittle(StoreInt, write_bytes[0..store_size], write_value);
+}
+
+pub fn writePackedIntBig(comptime T: type, bytes: []u8, bit_offset: usize, value: T) void {
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const bit_count = @as(usize, @bitSizeOf(T));
+    const bit_shift = @intCast(u3, bit_offset % 8);
+    const byte_count = (bit_shift + bit_count + 7) / 8;
+
+    const store_size = (@bitSizeOf(T) + 7) / 8;
+    const store_tail_bits = @intCast(u3, (store_size * 8) - bit_count);
+    const StoreInt = std.meta.Int(.unsigned, store_size * 8);
+
+    if (bit_count == 0)
+        return;
+
+    // Write by storing a StoreInt, and then follow it up with a 1-byte tail
+    // if bit_offset pushed us over a byte boundary.
+    const end = bytes.len - (bit_offset / 8);
+    const write_bytes = bytes[(end - byte_count)..end];
+    const head = write_bytes[byte_count - 1] & ((@as(u8, 1) << bit_shift) - 1);
+
+    var write_value = (@as(StoreInt, @bitCast(uN, value)) << bit_shift) | @intCast(StoreInt, head);
+    if (bit_shift > store_tail_bits) {
+        const tail_len = @intCast(Log2N, bit_shift - store_tail_bits);
+        write_bytes[0] &= ~((@as(u8, 1) << @intCast(u3, tail_len)) - 1);
+        write_bytes[0] |= @intCast(u8, (@bitCast(uN, value) >> (@truncate(Log2N, bit_count) -% tail_len)));
+    } else if (bit_shift < store_tail_bits) {
+        const tail_len = store_tail_bits - bit_shift;
+        const tail = write_bytes[0] & (@as(u8, 0xfe) << (7 - tail_len));
+        write_value |= @as(StoreInt, tail) << (8 * (store_size - 1));
+    }
+
+    writeIntBig(StoreInt, write_bytes[(byte_count - store_size)..][0..store_size], write_value);
+}
+
+pub const writePackedIntNative = switch (native_endian) {
+    .Little => writePackedIntLittle,
+    .Big => writePackedIntBig,
+};
+
+pub const writePackedIntForeign = switch (native_endian) {
+    .Little => writePackedIntBig,
+    .Big => writePackedIntLittle,
+};
+
+/// Stores an integer to packed memory.
+/// Asserts that buffer contains at least bit_offset + @bitSizeOf(T) bits.
+///
+/// Example:
+///     const T = packed struct(u16){ a: u3, b: u7, c: u6 };
+///     var st = T{ .a = 1, .b = 2, .c = 4 };
+///     // st.b = 0x7f;
+///     writePackedInt(u7, std.mem.asBytes(&st), @bitOffsetOf(T, "b"), 0x7f, builtin.cpu.arch.endian());
+///
+pub fn writePackedInt(comptime T: type, bytes: []u8, bit_offset: usize, value: T, endian: Endian) void {
+    switch (endian) {
+        .Little => writePackedIntLittle(T, bytes, bit_offset, value),
+        .Big => writePackedIntBig(T, bytes, bit_offset, value),
+    }
+}
+
 /// Writes a twos-complement little-endian integer to memory.
 /// Asserts that buf.len >= @typeInfo(T).Int.bits / 8.
 /// The bit count of T must be divisible by 8.
@@ -1523,6 +1765,69 @@ pub fn writeIntSlice(comptime T: type, buffer: []u8, value: T, endian: Endian) v
     };
 }
 
+/// Stores an integer to packed memory with provided bit_count, bit_offset, and signedness.
+/// If negative, the written value is sign-extended.
+///
+/// Example:
+///     const T = packed struct(u16){ a: u3, b: u7, c: u6 };
+///     var st = T{ .a = 1, .b = 2, .c = 4 };
+///     // st.b = 0x7f;
+///     var value: u64 = 0x7f;
+///     writeVarPackedInt(std.mem.asBytes(&st), @bitOffsetOf(T, "b"), 7, value, builtin.cpu.arch.endian());
+///
+pub fn writeVarPackedInt(bytes: []u8, bit_offset: usize, bit_count: usize, value: anytype, endian: std.builtin.Endian) void {
+    const T = @TypeOf(value);
+    const uN = std.meta.Int(.unsigned, @bitSizeOf(T));
+    const Log2N = std.math.Log2Int(T);
+
+    const bit_shift = @intCast(u3, bit_offset % 8);
+    const write_size = (bit_count + bit_shift + 7) / 8;
+    const lowest_byte = switch (endian) {
+        .Big => bytes.len - (bit_offset / 8) - write_size,
+        .Little => bit_offset / 8,
+    };
+    const write_bytes = bytes[lowest_byte..][0..write_size];
+
+    if (write_size == 1) {
+        // Single byte writes are handled specially, since we need to mask bits
+        // on both ends of the byte.
+        const mask = (@as(u8, 0xff) >> @intCast(u3, 8 - bit_count));
+        const new_bits = @intCast(u8, @bitCast(uN, value) & mask) << bit_shift;
+        write_bytes[0] = (write_bytes[0] & ~(mask << bit_shift)) | new_bits;
+        return;
+    }
+
+    var remaining: T = value;
+
+    // Iterate bytes forward for Little-endian, backward for Big-endian
+    const delta: i2 = if (endian == .Big) -1 else 1;
+    const start = if (endian == .Big) @intCast(isize, write_bytes.len - 1) else 0;
+
+    var i: isize = start; // isize for signed index arithmetic
+
+    // Write first byte, using a mask to protects bits preceding bit_offset
+    const head_mask = @as(u8, 0xff) >> bit_shift;
+    write_bytes[@intCast(usize, i)] &= ~(head_mask << bit_shift);
+    write_bytes[@intCast(usize, i)] |= @intCast(u8, @bitCast(uN, remaining) & head_mask) << bit_shift;
+    remaining >>= @intCast(Log2N, @as(u4, 8) - bit_shift);
+    i += delta;
+
+    // Write bytes[1..bytes.len - 1]
+    if (@bitSizeOf(T) > 8) {
+        const loop_end = start + delta * (@intCast(isize, write_size) - 1);
+        while (i != loop_end) : (i += delta) {
+            write_bytes[@intCast(usize, i)] = @truncate(u8, @bitCast(uN, remaining));
+            remaining >>= 8;
+        }
+    }
+
+    // Write last byte, using a mask to protect bits following bit_offset + bit_count
+    const following_bits = -%@truncate(u3, bit_shift + bit_count);
+    const tail_mask = (@as(u8, 0xff) << following_bits) >> following_bits;
+    write_bytes[@intCast(usize, i)] &= ~tail_mask;
+    write_bytes[@intCast(usize, i)] |= @intCast(u8, @bitCast(uN, remaining) & tail_mask);
+}
+
 test "writeIntBig and writeIntLittle" {
     var buf0: [0]u8 = undefined;
     var buf1: [1]u8 = undefined;
@@ -3393,3 +3698,158 @@ pub fn alignInSlice(slice: anytype, comptime new_alignment: usize) ?AlignedSlice
     const aligned_slice = bytesAsSlice(Element, aligned_bytes[0..slice_length_bytes]);
     return @alignCast(new_alignment, aligned_slice);
 }
+
+test "read/write(Var)PackedInt" {
+    const foreign_endian: Endian = if (native_endian == .Big) .Little else .Big;
+    const expect = std.testing.expect;
+    var prng = std.rand.DefaultPrng.init(1234);
+    const random = prng.random();
+
+    @setEvalBranchQuota(10_000);
+    inline for ([_]type{ u8, u16, u32, u128 }) |BackingType| {
+        for ([_]BackingType{
+            @as(BackingType, 0), // all zeros
+            -%@as(BackingType, 1), // all ones
+            random.int(BackingType), // random
+            random.int(BackingType), // random
+            random.int(BackingType), // random
+        }) |init_value| {
+            const uTs = [_]type{ u1, u3, u7, u8, u9, u10, u15, u16, u86 };
+            const iTs = [_]type{ i1, i3, i7, i8, i9, i10, i15, i16, i86 };
+            inline for (uTs ++ iTs) |PackedType| {
+                if (@bitSizeOf(PackedType) > @bitSizeOf(BackingType))
+                    continue;
+
+                const iPackedType = std.meta.Int(.signed, @bitSizeOf(PackedType));
+                const uPackedType = std.meta.Int(.unsigned, @bitSizeOf(PackedType));
+                const Log2T = std.math.Log2Int(BackingType);
+
+                const offset_at_end = @bitSizeOf(BackingType) - @bitSizeOf(PackedType);
+                for ([_]usize{ 0, 1, 7, 8, 9, 10, 15, 16, 86, offset_at_end }) |offset| {
+                    if (offset > offset_at_end or offset == @bitSizeOf(BackingType))
+                        continue;
+
+                    for ([_]PackedType{
+                        ~@as(PackedType, 0), // all ones: -1 iN / maxInt uN
+                        @as(PackedType, 0), // all zeros: 0 iN / 0 uN
+                        @bitCast(PackedType, @as(iPackedType, math.maxInt(iPackedType))), // maxInt iN
+                        @bitCast(PackedType, @as(iPackedType, math.minInt(iPackedType))), // maxInt iN
+                        random.int(PackedType), // random
+                        random.int(PackedType), // random
+                    }) |write_value| {
+                        { // Fixed-size Read/Write (Native-endian)
+
+                            // Initialize Value
+                            var value: BackingType = init_value;
+
+                            // Read
+                            const read_value1 = readPackedInt(PackedType, asBytes(&value), offset, native_endian);
+                            try expect(read_value1 == @bitCast(PackedType, @truncate(uPackedType, value >> @intCast(Log2T, offset))));
+
+                            // Write
+                            writePackedInt(PackedType, asBytes(&value), offset, write_value, native_endian);
+                            try expect(write_value == @bitCast(PackedType, @truncate(uPackedType, value >> @intCast(Log2T, offset))));
+
+                            // Read again
+                            const read_value2 = readPackedInt(PackedType, asBytes(&value), offset, native_endian);
+                            try expect(read_value2 == write_value);
+
+                            // Verify bits outside of the target integer are unmodified
+                            const diff_bits = init_value ^ value;
+                            if (offset != offset_at_end)
+                                try expect(diff_bits >> @intCast(Log2T, offset + @bitSizeOf(PackedType)) == 0);
+                            if (offset != 0)
+                                try expect(diff_bits << @intCast(Log2T, @bitSizeOf(BackingType) - offset) == 0);
+                        }
+
+                        { // Fixed-size Read/Write (Foreign-endian)
+
+                            // Initialize Value
+                            var value: BackingType = @byteSwap(init_value);
+
+                            // Read
+                            const read_value1 = readPackedInt(PackedType, asBytes(&value), offset, foreign_endian);
+                            try expect(read_value1 == @bitCast(PackedType, @truncate(uPackedType, @byteSwap(value) >> @intCast(Log2T, offset))));
+
+                            // Write
+                            writePackedInt(PackedType, asBytes(&value), offset, write_value, foreign_endian);
+                            try expect(write_value == @bitCast(PackedType, @truncate(uPackedType, @byteSwap(value) >> @intCast(Log2T, offset))));
+
+                            // Read again
+                            const read_value2 = readPackedInt(PackedType, asBytes(&value), offset, foreign_endian);
+                            try expect(read_value2 == write_value);
+
+                            // Verify bits outside of the target integer are unmodified
+                            const diff_bits = init_value ^ @byteSwap(value);
+                            if (offset != offset_at_end)
+                                try expect(diff_bits >> @intCast(Log2T, offset + @bitSizeOf(PackedType)) == 0);
+                            if (offset != 0)
+                                try expect(diff_bits << @intCast(Log2T, @bitSizeOf(BackingType) - offset) == 0);
+                        }
+
+                        const signedness = @typeInfo(PackedType).Int.signedness;
+                        const NextPowerOfTwoInt = std.meta.Int(signedness, comptime try std.math.ceilPowerOfTwo(u16, @bitSizeOf(PackedType)));
+                        const ui64 = std.meta.Int(signedness, 64);
+                        inline for ([_]type{ PackedType, NextPowerOfTwoInt, ui64 }) |U| {
+                            { // Variable-size Read/Write (Native-endian)
+
+                                if (@bitSizeOf(U) < @bitSizeOf(PackedType))
+                                    continue;
+
+                                // Initialize Value
+                                var value: BackingType = init_value;
+
+                                // Read
+                                const read_value1 = readVarPackedInt(U, asBytes(&value), offset, @bitSizeOf(PackedType), native_endian, signedness);
+                                try expect(read_value1 == @bitCast(PackedType, @truncate(uPackedType, value >> @intCast(Log2T, offset))));
+
+                                // Write
+                                writeVarPackedInt(asBytes(&value), offset, @bitSizeOf(PackedType), @as(U, write_value), native_endian);
+                                try expect(write_value == @bitCast(PackedType, @truncate(uPackedType, value >> @intCast(Log2T, offset))));
+
+                                // Read again
+                                const read_value2 = readVarPackedInt(U, asBytes(&value), offset, @bitSizeOf(PackedType), native_endian, signedness);
+                                try expect(read_value2 == write_value);
+
+                                // Verify bits outside of the target integer are unmodified
+                                const diff_bits = init_value ^ value;
+                                if (offset != offset_at_end)
+                                    try expect(diff_bits >> @intCast(Log2T, offset + @bitSizeOf(PackedType)) == 0);
+                                if (offset != 0)
+                                    try expect(diff_bits << @intCast(Log2T, @bitSizeOf(BackingType) - offset) == 0);
+                            }
+
+                            { // Variable-size Read/Write (Foreign-endian)
+
+                                if (@bitSizeOf(U) < @bitSizeOf(PackedType))
+                                    continue;
+
+                                // Initialize Value
+                                var value: BackingType = @byteSwap(init_value);
+
+                                // Read
+                                const read_value1 = readVarPackedInt(U, asBytes(&value), offset, @bitSizeOf(PackedType), foreign_endian, signedness);
+                                try expect(read_value1 == @bitCast(PackedType, @truncate(uPackedType, @byteSwap(value) >> @intCast(Log2T, offset))));
+
+                                // Write
+                                writeVarPackedInt(asBytes(&value), offset, @bitSizeOf(PackedType), @as(U, write_value), foreign_endian);
+                                try expect(write_value == @bitCast(PackedType, @truncate(uPackedType, @byteSwap(value) >> @intCast(Log2T, offset))));
+
+                                // Read again
+                                const read_value2 = readVarPackedInt(U, asBytes(&value), offset, @bitSizeOf(PackedType), foreign_endian, signedness);
+                                try expect(read_value2 == write_value);
+
+                                // Verify bits outside of the target integer are unmodified
+                                const diff_bits = init_value ^ @byteSwap(value);
+                                if (offset != offset_at_end)
+                                    try expect(diff_bits >> @intCast(Log2T, offset + @bitSizeOf(PackedType)) == 0);
+                                if (offset != 0)
+                                    try expect(diff_bits << @intCast(Log2T, @bitSizeOf(BackingType) - offset) == 0);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

From 3295fee9116789f144e6406493116c451aee7c57 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 18 Oct 2022 11:37:43 -0700
Subject: [PATCH 2/5] stage2: Use mem.readPackedInt etc. for packed bitcasts

Packed memory has a well-defined layout that doesn't require
conversion from an integer to read from. Let's use it :-)

This change means that for bitcasting to/from a packed value that
is N layers deep, we no longer have to create N temporary big-ints
and perform N copies.

Other miscellaneous improvements:
  - Adds support for casting to packed enums and vectors
  - Fixes bitcasting to/from vectors outside of a packed struct
  - Adds a fast path for bitcasting <= u/i64
  - Fixes bug when bitcasting f80 which would clear following fields

This also changes the bitcast memory layout of exotic integers on
big-endian systems to match what's empirically observed on our targets.
Technically, this layout is not guaranteed by LLVM so we should probably
ban bitcasts that reveal these padding bits, but for now this is an
improvement.
---
 lib/std/math/big/int.zig      | 161 +++++------
 lib/std/math/big/int_test.zig | 102 ++++---
 src/Sema.zig                  |  42 ---
 src/codegen.zig               |   2 +-
 src/value.zig                 | 497 +++++++++++++++++-----------------
 test/behavior/bitcast.zig     |  87 +++++-
 6 files changed, 465 insertions(+), 426 deletions(-)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index b875f73b2e..ac2f089ea1 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1762,16 +1762,32 @@ pub const Mutable = struct {
     }
 
     /// Read the value of `x` from `buffer`
-    /// Asserts that `buffer`, `abi_size`, and `bit_count` are large enough to store the value.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
     ///
     /// The contents of `buffer` are interpreted as if they were the contents of
-    /// @ptrCast(*[abi_size]const u8, &x). Byte ordering is determined by `endian`
+    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
     /// and any required padding bits are expected on the MSB end.
     pub fn readTwosComplement(
         x: *Mutable,
         buffer: []const u8,
         bit_count: usize,
-        abi_size: usize,
+        endian: Endian,
+        signedness: Signedness,
+    ) void {
+        return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
+    }
+
+    /// Read the value of `x` from a packed memory `buffer`.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
+    /// at offset `bit_offset`.
+    ///
+    /// This is equivalent to loading the value of an integer with `bit_count` bits as
+    /// if it were a field in packed memory at the provided bit offset.
+    pub fn readPackedTwosComplement(
+        x: *Mutable,
+        bytes: []const u8,
+        bit_offset: usize,
+        bit_count: usize,
         endian: Endian,
         signedness: Signedness,
     ) void {
@@ -1782,75 +1798,54 @@ pub const Mutable = struct {
             return;
         }
 
-        // byte_count is our total read size: it cannot exceed abi_size,
-        // but may be less as long as it includes the required bits
-        const limb_count = calcTwosCompLimbCount(bit_count);
-        const byte_count = std.math.min(abi_size, @sizeOf(Limb) * limb_count);
-        assert(8 * byte_count >= bit_count);
-
         // Check whether the input is negative
         var positive = true;
         if (signedness == .signed) {
+            const total_bits = bit_offset + bit_count;
             var last_byte = switch (endian) {
-                .Little => ((bit_count + 7) / 8) - 1,
-                .Big => abi_size - ((bit_count + 7) / 8),
+                .Little => ((total_bits + 7) / 8) - 1,
+                .Big => bytes.len - ((total_bits + 7) / 8),
             };
 
-            const sign_bit = @as(u8, 1) << @intCast(u3, (bit_count - 1) % 8);
-            positive = ((buffer[last_byte] & sign_bit) == 0);
+            const sign_bit = @as(u8, 1) << @intCast(u3, (total_bits - 1) % 8);
+            positive = ((bytes[last_byte] & sign_bit) == 0);
         }
 
         // Copy all complete limbs
-        var carry: u1 = if (positive) 0 else 1;
+        var carry: u1 = 1;
         var limb_index: usize = 0;
+        var bit_index: usize = 0;
         while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
-            var buf_index = switch (endian) {
-                .Little => @sizeOf(Limb) * limb_index,
-                .Big => abi_size - (limb_index + 1) * @sizeOf(Limb),
-            };
-
-            const limb_buf = @ptrCast(*const [@sizeOf(Limb)]u8, buffer[buf_index..]);
-            var limb = mem.readInt(Limb, limb_buf, endian);
+            // Read one Limb of bits
+            var limb = mem.readPackedInt(Limb, bytes, bit_index + bit_offset, endian);
+            bit_index += @bitSizeOf(Limb);
 
             // 2's complement (bitwise not, then add carry bit)
             if (!positive) carry = @boolToInt(@addWithOverflow(Limb, ~limb, carry, &limb));
             x.limbs[limb_index] = limb;
         }
 
-        // Copy the remaining N bytes (N <= @sizeOf(Limb))
-        var bytes_read = limb_index * @sizeOf(Limb);
-        if (bytes_read != byte_count) {
-            var limb: Limb = 0;
-
-            while (bytes_read != byte_count) {
-                const read_size = std.math.floorPowerOfTwo(usize, byte_count - bytes_read);
-                var int_buffer = switch (endian) {
-                    .Little => buffer[bytes_read..],
-                    .Big => buffer[(abi_size - bytes_read - read_size)..],
-                };
-                limb |= @intCast(Limb, switch (read_size) {
-                    1 => mem.readInt(u8, int_buffer[0..1], endian),
-                    2 => mem.readInt(u16, int_buffer[0..2], endian),
-                    4 => mem.readInt(u32, int_buffer[0..4], endian),
-                    8 => mem.readInt(u64, int_buffer[0..8], endian),
-                    16 => mem.readInt(u128, int_buffer[0..16], endian),
-                    else => unreachable,
-                }) << @intCast(Log2Limb, 8 * (bytes_read % @sizeOf(Limb)));
-                bytes_read += read_size;
-            }
+        // Copy the remaining bits
+        if (bit_count != bit_index) {
+            // Read all remaining bits
+            var limb = switch (signedness) {
+                .unsigned => mem.readVarPackedInt(Limb, bytes, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
+                .signed => b: {
+                    const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
+                    const limb = mem.readVarPackedInt(SLimb, bytes, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
+                    break :b @bitCast(Limb, limb);
+                },
+            };
 
             // 2's complement (bitwise not, then add carry bit)
-            if (!positive) _ = @addWithOverflow(Limb, ~limb, carry, &limb);
+            if (!positive) assert(!@addWithOverflow(Limb, ~limb, carry, &limb));
+            x.limbs[limb_index] = limb;
 
-            // Mask off any unused bits
-            const valid_bits = @intCast(Log2Limb, bit_count % @bitSizeOf(Limb));
-            const mask = (@as(Limb, 1) << valid_bits) -% 1; // 0b0..01..1 with (valid_bits_in_limb) trailing ones
-            limb &= mask;
-
-            x.limbs[limb_count - 1] = limb;
+            limb_index += 1;
         }
+
         x.positive = positive;
-        x.len = limb_count;
+        x.len = limb_index;
         x.normalize(x.len);
     }
 
@@ -2212,66 +2207,48 @@ pub const Const = struct {
     }
 
     /// Write the value of `x` into `buffer`
-    /// Asserts that `buffer`, `abi_size`, and `bit_count` are large enough to store the value.
+    /// Asserts that `buffer` is large enough to store the value.
     ///
     /// `buffer` is filled so that its contents match what would be observed via
-    /// @ptrCast(*[abi_size]const u8, &x). Byte ordering is determined by `endian`,
+    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`,
     /// and any required padding bits are added on the MSB end.
-    pub fn writeTwosComplement(x: Const, buffer: []u8, bit_count: usize, abi_size: usize, endian: Endian) void {
+    pub fn writeTwosComplement(x: Const, buffer: []u8, endian: Endian) void {
+        return writePackedTwosComplement(x, buffer, 0, 8 * buffer.len, endian);
+    }
 
-        // byte_count is our total write size
-        const byte_count = abi_size;
-        assert(8 * byte_count >= bit_count);
-        assert(buffer.len >= byte_count);
+    /// Write the value of `x` to a packed memory `buffer`.
+    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
+    /// at offset `bit_offset`.
+    ///
+    /// This is equivalent to storing the value of an integer with `bit_count` bits as
+    /// if it were a field in packed memory at the provided bit offset.
+    pub fn writePackedTwosComplement(x: Const, bytes: []u8, bit_offset: usize, bit_count: usize, endian: Endian) void {
         assert(x.fitsInTwosComp(if (x.positive) .unsigned else .signed, bit_count));
 
         // Copy all complete limbs
-        var carry: u1 = if (x.positive) 0 else 1;
+        var carry: u1 = 1;
         var limb_index: usize = 0;
-        while (limb_index < byte_count / @sizeOf(Limb)) : (limb_index += 1) {
-            var buf_index = switch (endian) {
-                .Little => @sizeOf(Limb) * limb_index,
-                .Big => abi_size - (limb_index + 1) * @sizeOf(Limb),
-            };
-
+        var bit_index: usize = 0;
+        while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
             var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
+
             // 2's complement (bitwise not, then add carry bit)
             if (!x.positive) carry = @boolToInt(@addWithOverflow(Limb, ~limb, carry, &limb));
 
-            var limb_buf = @ptrCast(*[@sizeOf(Limb)]u8, buffer[buf_index..]);
-            mem.writeInt(Limb, limb_buf, limb, endian);
+            // Write one Limb of bits
+            mem.writePackedInt(Limb, bytes, bit_index + bit_offset, limb, endian);
+            bit_index += @bitSizeOf(Limb);
         }
 
-        // Copy the remaining N bytes (N < @sizeOf(Limb))
-        var bytes_written = limb_index * @sizeOf(Limb);
-        if (bytes_written != byte_count) {
+        // Copy the remaining bits
+        if (bit_count != bit_index) {
             var limb: Limb = if (limb_index < x.limbs.len) x.limbs[limb_index] else 0;
+
             // 2's complement (bitwise not, then add carry bit)
             if (!x.positive) _ = @addWithOverflow(Limb, ~limb, carry, &limb);
 
-            while (bytes_written != byte_count) {
-                const write_size = std.math.floorPowerOfTwo(usize, byte_count - bytes_written);
-                var int_buffer = switch (endian) {
-                    .Little => buffer[bytes_written..],
-                    .Big => buffer[(abi_size - bytes_written - write_size)..],
-                };
-
-                if (write_size == 1) {
-                    mem.writeInt(u8, int_buffer[0..1], @truncate(u8, limb), endian);
-                } else if (@sizeOf(Limb) >= 2 and write_size == 2) {
-                    mem.writeInt(u16, int_buffer[0..2], @truncate(u16, limb), endian);
-                } else if (@sizeOf(Limb) >= 4 and write_size == 4) {
-                    mem.writeInt(u32, int_buffer[0..4], @truncate(u32, limb), endian);
-                } else if (@sizeOf(Limb) >= 8 and write_size == 8) {
-                    mem.writeInt(u64, int_buffer[0..8], @truncate(u64, limb), endian);
-                } else if (@sizeOf(Limb) >= 16 and write_size == 16) {
-                    mem.writeInt(u128, int_buffer[0..16], @truncate(u128, limb), endian);
-                } else if (@sizeOf(Limb) >= 32) {
-                    @compileError("@sizeOf(Limb) exceeded supported range");
-                } else unreachable;
-                limb >>= @intCast(Log2Limb, 8 * write_size);
-                bytes_written += write_size;
-            }
+            // Write all remaining bits
+            mem.writeVarPackedInt(bytes, bit_index + bit_offset, bit_count - bit_index, limb, endian);
         }
     }
 
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 5685a38d41..97de06bfcc 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2603,13 +2603,13 @@ test "big int conversion read/write twos complement" {
 
     for (endians) |endian| {
         // Writing to buffer and back should not change anything
-        a.toConst().writeTwosComplement(buffer1, 493, abi_size, endian);
-        m.readTwosComplement(buffer1, 493, abi_size, endian, .unsigned);
+        a.toConst().writeTwosComplement(buffer1[0..abi_size], endian);
+        m.readTwosComplement(buffer1[0..abi_size], 493, endian, .unsigned);
         try testing.expect(m.toConst().order(a.toConst()) == .eq);
 
         // Equivalent to @bitCast(i493, @as(u493, intMax(u493))
-        a.toConst().writeTwosComplement(buffer1, 493, abi_size, endian);
-        m.readTwosComplement(buffer1, 493, abi_size, endian, .signed);
+        a.toConst().writeTwosComplement(buffer1[0..abi_size], endian);
+        m.readTwosComplement(buffer1[0..abi_size], 493, endian, .signed);
         try testing.expect(m.toConst().orderAgainstScalar(-1) == .eq);
     }
 }
@@ -2628,26 +2628,26 @@ test "big int conversion read twos complement with padding" {
     // (3) should sign-extend any bits from bit_count to 8 * abi_size
 
     var bit_count: usize = 12 * 8 + 1;
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0, 0x0, 0x0 }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd }));
 
     @memset(buffer1.ptr, 0xaa, buffer1.len);
     try a.set(-0x01_02030405_06070809_0a0b0c0d);
     bit_count = 12 * 8 + 2;
 
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3, 0xaa, 0xaa, 0xaa }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff }));
-    a.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    a.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &[_]u8{ 0xff, 0xff, 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 }));
 }
 
@@ -2660,17 +2660,15 @@ test "big int write twos complement +/- zero" {
     defer testing.allocator.free(buffer1);
     @memset(buffer1.ptr, 0xaa, buffer1.len);
 
-    var bit_count: usize = 0;
-
     // Test zero
 
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
 
     @memset(buffer1.ptr, 0xaa, buffer1.len);
@@ -2678,13 +2676,13 @@ test "big int write twos complement +/- zero" {
 
     // Test negative zero
 
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 13, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..13], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 13) ++ ([_]u8{0xaa} ** 3))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Little);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Little);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
-    m.toConst().writeTwosComplement(buffer1, bit_count, 16, .Big);
+    m.toConst().writeTwosComplement(buffer1[0..16], .Big);
     try testing.expect(std.mem.eql(u8, buffer1, &(([_]u8{0} ** 16))));
 }
 
@@ -2705,62 +2703,82 @@ test "big int conversion write twos complement with padding" {
     // Test 0x01_02030405_06070809_0a0b0c0d
 
     buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xb };
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xb, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xab, 0xaa, 0xaa, 0xaa };
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0xab, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x01_02030405_06070809_0a0b0c0d) == .eq);
 
+    bit_count = @sizeOf(Limb) * 8;
+
+    // Test 0x0a0a0a0a_02030405_06070809_0a0b0c0d
+
+    buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xaa };
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xaa, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0xaa, 0xaa, 0xaa, 0xaa };
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaaaaaaaa_02030405_06070809_0a0b0c0d)) == .eq);
+
+    buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0xaa, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd };
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
+    try testing.expect(m.toConst().orderAgainstScalar(@truncate(Limb, 0xaaaaaaaa_02030405_06070809_0a0b0c0d)) == .eq);
+
     bit_count = 12 * 8 + 2;
 
     // Test -0x01_02030405_06070809_0a0b0c0d
 
     buffer = &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0x02 };
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .signed);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0x02, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 };
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .signed);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xf3, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0x02, 0xaa, 0xaa, 0xaa };
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .signed);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     buffer = &[_]u8{ 0xaa, 0xaa, 0xaa, 0x02, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf3 };
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .signed);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .signed);
     try testing.expect(m.toConst().orderAgainstScalar(-0x01_02030405_06070809_0a0b0c0d) == .eq);
 
     // Test 0
 
     buffer = &([_]u8{0} ** 16);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 
     bit_count = 0;
     buffer = &([_]u8{0xaa} ** 16);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 }
 
@@ -2779,15 +2797,15 @@ test "big int conversion write twos complement zero" {
     var buffer: []const u8 = undefined;
 
     buffer = &([_]u8{0} ** 13);
-    m.readTwosComplement(buffer, bit_count, 13, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 13, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..13], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 
     buffer = &([_]u8{0} ** 16);
-    m.readTwosComplement(buffer, bit_count, 16, .Little, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Little, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
-    m.readTwosComplement(buffer, bit_count, 16, .Big, .unsigned);
+    m.readTwosComplement(buffer[0..16], bit_count, .Big, .unsigned);
     try testing.expect(m.toConst().orderAgainstScalar(0x0) == .eq);
 }
 
diff --git a/src/Sema.zig b/src/Sema.zig
index 9f425b7bcf..fbc8d1dd8e 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -26445,48 +26445,6 @@ fn bitCastVal(
     const target = sema.mod.getTarget();
     if (old_ty.eql(new_ty, sema.mod)) return val;
 
-    // Some conversions have a bitwise definition that ignores in-memory layout,
-    // such as converting between f80 and u80.
-
-    if (old_ty.eql(Type.f80, sema.mod) and new_ty.isAbiInt()) {
-        const float = val.toFloat(f80);
-        switch (new_ty.intInfo(target).signedness) {
-            .signed => {
-                const int = @bitCast(i80, float);
-                const limbs = try sema.arena.alloc(std.math.big.Limb, 2);
-                const big_int = std.math.big.int.Mutable.init(limbs, int);
-                return Value.fromBigInt(sema.arena, big_int.toConst());
-            },
-            .unsigned => {
-                const int = @bitCast(u80, float);
-                const limbs = try sema.arena.alloc(std.math.big.Limb, 2);
-                const big_int = std.math.big.int.Mutable.init(limbs, int);
-                return Value.fromBigInt(sema.arena, big_int.toConst());
-            },
-        }
-    }
-
-    if (new_ty.eql(Type.f80, sema.mod) and old_ty.isAbiInt()) {
-        var bigint_space: Value.BigIntSpace = undefined;
-        var bigint = try val.toBigIntAdvanced(&bigint_space, target, sema.kit(block, src));
-        switch (old_ty.intInfo(target).signedness) {
-            .signed => {
-                // This conversion cannot fail because we already checked bit size before
-                // calling bitCastVal.
-                const int = bigint.to(i80) catch unreachable;
-                const float = @bitCast(f80, int);
-                return Value.Tag.float_80.create(sema.arena, float);
-            },
-            .unsigned => {
-                // This conversion cannot fail because we already checked bit size before
-                // calling bitCastVal.
-                const int = bigint.to(u80) catch unreachable;
-                const float = @bitCast(f80, int);
-                return Value.Tag.float_80.create(sema.arena, float);
-            },
-        }
-    }
-
     // For types with well-defined memory layouts, we serialize them a byte buffer,
     // then deserialize to the new type.
     const abi_size = try sema.usizeCast(block, src, old_ty.abiSize(target));
diff --git a/src/codegen.zig b/src/codegen.zig
index 757bd23b38..6acea5a509 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -470,7 +470,7 @@ pub fn generateSymbol(
                 const abi_size = math.cast(usize, typed_value.ty.abiSize(target)) orelse return error.Overflow;
                 const start = code.items.len;
                 try code.resize(start + abi_size);
-                bigint.writeTwosComplement(code.items[start..][0..abi_size], info.bits, abi_size, endian);
+                bigint.writeTwosComplement(code.items[start..][0..abi_size], endian);
                 return Result{ .appended = {} };
             }
             switch (info.signedness) {
diff --git a/src/value.zig b/src/value.zig
index 28601c1723..7468a69fda 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1206,8 +1206,13 @@ pub const Value = extern union {
         };
     }
 
+    /// Write a Value's contents to `buffer`.
+    ///
+    /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
+    /// the end of the value in memory.
     pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) void {
         const target = mod.getTarget();
+        const endian = target.cpu.arch.endian();
         if (val.isUndef()) {
             const size = @intCast(usize, ty.abiSize(target));
             std.mem.set(u8, buffer[0..size], 0xaa);
@@ -1218,31 +1223,41 @@ pub const Value = extern union {
             .Bool => {
                 buffer[0] = @boolToInt(val.toBool());
             },
-            .Int => {
-                var bigint_buffer: BigIntSpace = undefined;
-                const bigint = val.toBigInt(&bigint_buffer, target);
-                const bits = ty.intInfo(target).bits;
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                bigint.writeTwosComplement(buffer, bits, abi_size, target.cpu.arch.endian());
-            },
-            .Enum => {
+            .Int, .Enum => {
+                const int_info = ty.intInfo(target);
+                const bits = int_info.bits;
+                const byte_count = (bits + 7) / 8;
+
                 var enum_buffer: Payload.U64 = undefined;
                 const int_val = val.enumToInt(ty, &enum_buffer);
-                var bigint_buffer: BigIntSpace = undefined;
-                const bigint = int_val.toBigInt(&bigint_buffer, target);
-                const bits = ty.intInfo(target).bits;
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                bigint.writeTwosComplement(buffer, bits, abi_size, target.cpu.arch.endian());
+
+                if (byte_count <= @sizeOf(u64)) {
+                    const int: u64 = switch (int_val.tag()) {
+                        .zero => 0,
+                        .one => 1,
+                        .int_u64 => int_val.castTag(.int_u64).?.data,
+                        .int_i64 => @bitCast(u64, int_val.castTag(.int_i64).?.data),
+                        else => unreachable,
+                    };
+                    for (buffer[0..byte_count]) |_, i| switch (endian) {
+                        .Little => buffer[i] = @truncate(u8, (int >> @intCast(u6, (8 * i)))),
+                        .Big => buffer[byte_count - i - 1] = @truncate(u8, (int >> @intCast(u6, (8 * i)))),
+                    };
+                } else {
+                    var bigint_buffer: BigIntSpace = undefined;
+                    const bigint = int_val.toBigInt(&bigint_buffer, target);
+                    bigint.writeTwosComplement(buffer[0..byte_count], endian);
+                }
             },
             .Float => switch (ty.floatBits(target)) {
-                16 => return floatWriteToMemory(f16, val.toFloat(f16), target, buffer),
-                32 => return floatWriteToMemory(f32, val.toFloat(f32), target, buffer),
-                64 => return floatWriteToMemory(f64, val.toFloat(f64), target, buffer),
-                80 => return floatWriteToMemory(f80, val.toFloat(f80), target, buffer),
-                128 => return floatWriteToMemory(f128, val.toFloat(f128), target, buffer),
+                16 => std.mem.writeInt(u16, buffer[0..2], @bitCast(u16, val.toFloat(f16)), endian),
+                32 => std.mem.writeInt(u32, buffer[0..4], @bitCast(u32, val.toFloat(f32)), endian),
+                64 => std.mem.writeInt(u64, buffer[0..8], @bitCast(u64, val.toFloat(f64)), endian),
+                80 => std.mem.writeInt(u80, buffer[0..10], @bitCast(u80, val.toFloat(f80)), endian),
+                128 => std.mem.writeInt(u128, buffer[0..16], @bitCast(u128, val.toFloat(f128)), endian),
                 else => unreachable,
             },
-            .Array, .Vector => {
+            .Array => {
                 const len = ty.arrayLen();
                 const elem_ty = ty.childType();
                 const elem_size = @intCast(usize, elem_ty.abiSize(target));
@@ -1251,10 +1266,16 @@ pub const Value = extern union {
                 var buf_off: usize = 0;
                 while (elem_i < len) : (elem_i += 1) {
                     const elem_val = val.elemValueBuffer(mod, elem_i, &elem_value_buf);
-                    writeToMemory(elem_val, elem_ty, mod, buffer[buf_off..]);
+                    elem_val.writeToMemory(elem_ty, mod, buffer[buf_off..]);
                     buf_off += elem_size;
                 }
             },
+            .Vector => {
+                // We use byte_count instead of abi_size here, so that any padding bytes
+                // follow the data bytes, on both big- and little-endian systems.
+                const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
+            },
             .Struct => switch (ty.containerLayout()) {
                 .Auto => unreachable, // Sema is supposed to have emitted a compile error already
                 .Extern => {
@@ -1266,122 +1287,113 @@ pub const Value = extern union {
                     }
                 },
                 .Packed => {
-                    // TODO allocate enough heap space instead of using this buffer
-                    // on the stack.
-                    var buf: [16]std.math.big.Limb = undefined;
-                    const host_int = packedStructToInt(val, ty, target, &buf);
-                    const abi_size = @intCast(usize, ty.abiSize(target));
-                    const bit_size = @intCast(usize, ty.bitSize(target));
-                    host_int.writeTwosComplement(buffer, bit_size, abi_size, target.cpu.arch.endian());
+                    const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                    writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
                 },
             },
             .ErrorSet => {
                 // TODO revisit this when we have the concept of the error tag type
                 const Int = u16;
                 const int = mod.global_error_set.get(val.castTag(.@"error").?.data.name).?;
-                std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), target.cpu.arch.endian());
+                std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), endian);
             },
             else => @panic("TODO implement writeToMemory for more types"),
         }
     }
 
-    fn packedStructToInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
-        var bigint = BigIntMutable.init(buf, 0);
-        const fields = ty.structFields().values();
-        const field_vals = val.castTag(.aggregate).?.data;
-        var bits: u16 = 0;
-        // TODO allocate enough heap space instead of using this buffer
-        // on the stack.
-        var field_buf: [16]std.math.big.Limb = undefined;
-        var field_space: BigIntSpace = undefined;
-        var field_buf2: [16]std.math.big.Limb = undefined;
-        for (fields) |field, i| {
-            const field_val = field_vals[i];
-            const field_bigint_const = switch (field.ty.zigTypeTag()) {
-                .Void => continue,
-                .Float => floatToBigInt(field_val, field.ty, target, &field_buf),
-                .Int, .Bool => intOrBoolToBigInt(field_val, field.ty, target, &field_buf, &field_space),
-                .Struct => switch (field.ty.containerLayout()) {
-                    .Auto, .Extern => unreachable, // Sema should have error'd before this.
-                    .Packed => packedStructToInt(field_val, field.ty, target, &field_buf),
-                },
-                .Vector => vectorToBigInt(field_val, field.ty, target, &field_buf),
-                .Enum => enumToBigInt(field_val, field.ty, target, &field_space),
-                .Union => unreachable, // TODO: packed structs support packed unions
-                else => unreachable,
-            };
-            var field_bigint = BigIntMutable.init(&field_buf2, 0);
-            field_bigint.shiftLeft(field_bigint_const, bits);
-            bits += @intCast(u16, field.ty.bitSize(target));
-            bigint.bitOr(bigint.toConst(), field_bigint.toConst());
-        }
-        return bigint.toConst();
-    }
-
-    fn intOrBoolToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb, space: *BigIntSpace) BigIntConst {
-        const big_int_const = val.toBigInt(space, target);
-        if (big_int_const.positive) return big_int_const;
-
-        var big_int = BigIntMutable.init(buf, 0);
-        big_int.bitNotWrap(big_int_const.negate(), .unsigned, @intCast(u32, ty.bitSize(target)));
-        big_int.addScalar(big_int.toConst(), 1);
-        return big_int.toConst();
-    }
-
-    fn vectorToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
+    /// Write a Value's contents to `buffer`.
+    ///
+    /// Both the start and the end of the provided buffer must be tight, since
+    /// big-endian packed memory layouts start at the end of the buffer.
+    pub fn writeToPackedMemory(val: Value, ty: Type, mod: *Module, buffer: []u8, bit_offset: usize) void {
+        const target = mod.getTarget();
         const endian = target.cpu.arch.endian();
-        var vec_bitint = BigIntMutable.init(buf, 0);
-        const vec_len = @intCast(usize, ty.arrayLen());
-        const elem_ty = ty.childType();
-        const elem_size = @intCast(usize, elem_ty.bitSize(target));
-
-        var elem_buf: [16]std.math.big.Limb = undefined;
-        var elem_space: BigIntSpace = undefined;
-        var elem_buf2: [16]std.math.big.Limb = undefined;
-
-        var elem_i: usize = 0;
-        while (elem_i < vec_len) : (elem_i += 1) {
-            const elem_i_target = if (endian == .Big) vec_len - elem_i - 1 else elem_i;
-            const elem_val = val.indexVectorlike(elem_i_target);
-            const elem_bigint_const = switch (elem_ty.zigTypeTag()) {
-                .Int, .Bool => intOrBoolToBigInt(elem_val, elem_ty, target, &elem_buf, &elem_space),
-                .Float => floatToBigInt(elem_val, elem_ty, target, &elem_buf),
-                .Pointer => unreachable, // TODO
-                else => unreachable, // Sema should not let this happen
-            };
-            var elem_bitint = BigIntMutable.init(&elem_buf2, 0);
-            elem_bitint.shiftLeft(elem_bigint_const, elem_size * elem_i);
-            vec_bitint.bitOr(vec_bitint.toConst(), elem_bitint.toConst());
+        if (val.isUndef()) {
+            const bit_size = @intCast(usize, ty.bitSize(target));
+            std.mem.writeVarPackedInt(buffer, bit_offset, bit_size, @as(u1, 0), endian);
+            return;
+        }
+        switch (ty.zigTypeTag()) {
+            .Void => {},
+            .Bool => {
+                const byte_index = switch (endian) {
+                    .Little => bit_offset / 8,
+                    .Big => buffer.len - bit_offset / 8 - 1,
+                };
+                if (val.toBool()) {
+                    buffer[byte_index] |= (@as(u8, 1) << @intCast(u3, bit_offset % 8));
+                } else {
+                    buffer[byte_index] &= ~(@as(u8, 1) << @intCast(u3, bit_offset % 8));
+                }
+            },
+            .Int, .Enum => {
+                const bits = ty.intInfo(target).bits;
+                const abi_size = @intCast(usize, ty.abiSize(target));
+
+                var enum_buffer: Payload.U64 = undefined;
+                const int_val = val.enumToInt(ty, &enum_buffer);
+
+                if (abi_size <= @sizeOf(u64)) {
+                    const int: u64 = switch (int_val.tag()) {
+                        .zero => 0,
+                        .one => 1,
+                        .int_u64 => int_val.castTag(.int_u64).?.data,
+                        .int_i64 => @bitCast(u64, int_val.castTag(.int_i64).?.data),
+                        else => unreachable,
+                    };
+                    std.mem.writeVarPackedInt(buffer, bit_offset, bits, int, endian);
+                } else {
+                    var bigint_buffer: BigIntSpace = undefined;
+                    const bigint = int_val.toBigInt(&bigint_buffer, target);
+                    bigint.writePackedTwosComplement(buffer, bit_offset, bits, endian);
+                }
+            },
+            .Float => switch (ty.floatBits(target)) {
+                16 => std.mem.writePackedInt(u16, buffer, bit_offset, @bitCast(u16, val.toFloat(f16)), endian),
+                32 => std.mem.writePackedInt(u32, buffer, bit_offset, @bitCast(u32, val.toFloat(f32)), endian),
+                64 => std.mem.writePackedInt(u64, buffer, bit_offset, @bitCast(u64, val.toFloat(f64)), endian),
+                80 => std.mem.writePackedInt(u80, buffer, bit_offset, @bitCast(u80, val.toFloat(f80)), endian),
+                128 => std.mem.writePackedInt(u128, buffer, bit_offset, @bitCast(u128, val.toFloat(f128)), endian),
+                else => unreachable,
+            },
+            .Vector => {
+                const len = ty.arrayLen();
+                const elem_ty = ty.childType();
+                const elem_bit_size = @intCast(u16, elem_ty.bitSize(target));
+
+                var bits: u16 = 0;
+                var elem_i: usize = 0;
+                var elem_value_buf: ElemValueBuffer = undefined;
+                while (elem_i < len) : (elem_i += 1) {
+                    // On big-endian systems, LLVM reverses the element order of vectors by default
+                    const tgt_elem_i = if (endian == .Big) len - elem_i - 1 else elem_i;
+                    const elem_val = val.elemValueBuffer(mod, tgt_elem_i, &elem_value_buf);
+                    elem_val.writeToPackedMemory(elem_ty, mod, buffer, bit_offset + bits);
+                    bits += elem_bit_size;
+                }
+            },
+            .Struct => switch (ty.containerLayout()) {
+                .Auto => unreachable, // Sema is supposed to have emitted a compile error already
+                .Extern => unreachable, // Handled in non-packed writeToMemory
+                .Packed => {
+                    var bits: u16 = 0;
+                    const fields = ty.structFields().values();
+                    const field_vals = val.castTag(.aggregate).?.data;
+                    for (fields) |field, i| {
+                        const field_bits = @intCast(u16, field.ty.bitSize(target));
+                        field_vals[i].writeToPackedMemory(field.ty, mod, buffer, bit_offset + bits);
+                        bits += field_bits;
+                    }
+                },
+            },
+            else => @panic("TODO implement writeToPackedMemory for more types"),
         }
-        return vec_bitint.toConst();
-    }
-
-    fn enumToBigInt(val: Value, ty: Type, target: Target, space: *BigIntSpace) BigIntConst {
-        var enum_buf: Payload.U64 = undefined;
-        const int_val = val.enumToInt(ty, &enum_buf);
-        return int_val.toBigInt(space, target);
-    }
-
-    fn floatToBigInt(val: Value, ty: Type, target: Target, buf: []std.math.big.Limb) BigIntConst {
-        return switch (ty.floatBits(target)) {
-            16 => bitcastFloatToBigInt(f16, val.toFloat(f16), buf),
-            32 => bitcastFloatToBigInt(f32, val.toFloat(f32), buf),
-            64 => bitcastFloatToBigInt(f64, val.toFloat(f64), buf),
-            80 => bitcastFloatToBigInt(f80, val.toFloat(f80), buf),
-            128 => bitcastFloatToBigInt(f128, val.toFloat(f128), buf),
-            else => unreachable,
-        };
-    }
-
-    fn bitcastFloatToBigInt(comptime F: type, f: F, buf: []std.math.big.Limb) BigIntConst {
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = @bitCast(Int, f);
-        return BigIntMutable.init(buf, int).toConst();
     }
 
+    /// Load a Value from the contents of `buffer`.
+    ///
+    /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
+    /// the end of the value in memory.
     pub fn readFromMemory(
         ty: Type,
         mod: *Module,
@@ -1389,6 +1401,7 @@ pub const Value = extern union {
         arena: Allocator,
     ) Allocator.Error!Value {
         const target = mod.getTarget();
+        const endian = target.cpu.arch.endian();
         switch (ty.zigTypeTag()) {
             .Void => return Value.@"void",
             .Bool => {
@@ -1398,27 +1411,40 @@ pub const Value = extern union {
                     return Value.@"true";
                 }
             },
-            .Int => {
-                if (buffer.len == 0) return Value.zero;
+            .Int, .Enum => {
                 const int_info = ty.intInfo(target);
-                const endian = target.cpu.arch.endian();
-                const Limb = std.math.big.Limb;
-                const limb_count = (buffer.len + @sizeOf(Limb) - 1) / @sizeOf(Limb);
-                const limbs_buffer = try arena.alloc(Limb, limb_count);
-                const abi_size = @intCast(usize, ty.abiSize(target));
-                var bigint = BigIntMutable.init(limbs_buffer, 0);
-                bigint.readTwosComplement(buffer, int_info.bits, abi_size, endian, int_info.signedness);
-                return fromBigInt(arena, bigint.toConst());
+                const bits = int_info.bits;
+                const byte_count = (bits + 7) / 8;
+                if (bits == 0 or buffer.len == 0) return Value.zero;
+
+                if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64
+                    .signed => {
+                        const val = std.mem.readVarInt(i64, buffer[0..byte_count], endian);
+                        return Value.Tag.int_i64.create(arena, (val << @intCast(u6, 64 - bits)) >> @intCast(u6, 64 - bits));
+                    },
+                    .unsigned => {
+                        const val = std.mem.readVarInt(u64, buffer[0..byte_count], endian);
+                        return Value.Tag.int_u64.create(arena, (val << @intCast(u6, 64 - bits)) >> @intCast(u6, 64 - bits));
+                    },
+                } else { // Slow path, we have to construct a big-int
+                    const Limb = std.math.big.Limb;
+                    const limb_count = (byte_count + @sizeOf(Limb) - 1) / @sizeOf(Limb);
+                    const limbs_buffer = try arena.alloc(Limb, limb_count);
+
+                    var bigint = BigIntMutable.init(limbs_buffer, 0);
+                    bigint.readTwosComplement(buffer[0..byte_count], bits, endian, int_info.signedness);
+                    return fromBigInt(arena, bigint.toConst());
+                }
             },
             .Float => switch (ty.floatBits(target)) {
-                16 => return Value.Tag.float_16.create(arena, floatReadFromMemory(f16, target, buffer)),
-                32 => return Value.Tag.float_32.create(arena, floatReadFromMemory(f32, target, buffer)),
-                64 => return Value.Tag.float_64.create(arena, floatReadFromMemory(f64, target, buffer)),
-                80 => return Value.Tag.float_80.create(arena, floatReadFromMemory(f80, target, buffer)),
-                128 => return Value.Tag.float_128.create(arena, floatReadFromMemory(f128, target, buffer)),
+                16 => return Value.Tag.float_16.create(arena, @bitCast(f16, std.mem.readInt(u16, buffer[0..2], endian))),
+                32 => return Value.Tag.float_32.create(arena, @bitCast(f32, std.mem.readInt(u32, buffer[0..4], endian))),
+                64 => return Value.Tag.float_64.create(arena, @bitCast(f64, std.mem.readInt(u64, buffer[0..8], endian))),
+                80 => return Value.Tag.float_80.create(arena, @bitCast(f80, std.mem.readInt(u80, buffer[0..10], endian))),
+                128 => return Value.Tag.float_128.create(arena, @bitCast(f128, std.mem.readInt(u128, buffer[0..16], endian))),
                 else => unreachable,
             },
-            .Array, .Vector => {
+            .Array => {
                 const elem_ty = ty.childType();
                 const elem_size = elem_ty.abiSize(target);
                 const elems = try arena.alloc(Value, @intCast(usize, ty.arrayLen()));
@@ -1429,6 +1455,12 @@ pub const Value = extern union {
                 }
                 return Tag.aggregate.create(arena, elems);
             },
+            .Vector => {
+                // We use byte_count instead of abi_size here, so that any padding bytes
+                // follow the data bytes, on both big- and little-endian systems.
+                const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                return readFromPackedMemory(ty, mod, buffer[0..byte_count], 0, arena);
+            },
             .Struct => switch (ty.containerLayout()) {
                 .Auto => unreachable, // Sema is supposed to have emitted a compile error already
                 .Extern => {
@@ -1436,26 +1468,20 @@ pub const Value = extern union {
                     const field_vals = try arena.alloc(Value, fields.len);
                     for (fields) |field, i| {
                         const off = @intCast(usize, ty.structFieldOffset(i, target));
-                        field_vals[i] = try readFromMemory(field.ty, mod, buffer[off..], arena);
+                        const sz = @intCast(usize, ty.structFieldType(i).abiSize(target));
+                        field_vals[i] = try readFromMemory(field.ty, mod, buffer[off..(off + sz)], arena);
                     }
                     return Tag.aggregate.create(arena, field_vals);
                 },
                 .Packed => {
-                    const endian = target.cpu.arch.endian();
-                    const Limb = std.math.big.Limb;
-                    const abi_size = @intCast(usize, ty.abiSize(target));
-                    const bit_size = @intCast(usize, ty.bitSize(target));
-                    const limb_count = (buffer.len + @sizeOf(Limb) - 1) / @sizeOf(Limb);
-                    const limbs_buffer = try arena.alloc(Limb, limb_count);
-                    var bigint = BigIntMutable.init(limbs_buffer, 0);
-                    bigint.readTwosComplement(buffer, bit_size, abi_size, endian, .unsigned);
-                    return intToPackedStruct(ty, target, bigint.toConst(), arena);
+                    const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
+                    return readFromPackedMemory(ty, mod, buffer[0..byte_count], 0, arena);
                 },
             },
             .ErrorSet => {
                 // TODO revisit this when we have the concept of the error tag type
                 const Int = u16;
-                const int = std.mem.readInt(Int, buffer[0..@sizeOf(Int)], target.cpu.arch.endian());
+                const int = std.mem.readInt(Int, buffer[0..@sizeOf(Int)], endian);
 
                 const payload = try arena.create(Value.Payload.Error);
                 payload.* = .{
@@ -1468,115 +1494,90 @@ pub const Value = extern union {
         }
     }
 
-    fn intToPackedStruct(
+    /// Load a Value from the contents of `buffer`.
+    ///
+    /// Both the start and the end of the provided buffer must be tight, since
+    /// big-endian packed memory layouts start at the end of the buffer.
+    pub fn readFromPackedMemory(
         ty: Type,
-        target: Target,
-        bigint: BigIntConst,
+        mod: *Module,
+        buffer: []const u8,
+        bit_offset: usize,
         arena: Allocator,
     ) Allocator.Error!Value {
-        const limbs_buffer = try arena.alloc(std.math.big.Limb, bigint.limbs.len);
-        var bigint_mut = bigint.toMutable(limbs_buffer);
-        const fields = ty.structFields().values();
-        const field_vals = try arena.alloc(Value, fields.len);
-        var bits: u16 = 0;
-        for (fields) |field, i| {
-            const field_bits = @intCast(u16, field.ty.bitSize(target));
-            bigint_mut.shiftRight(bigint, bits);
-            bigint_mut.truncate(bigint_mut.toConst(), .unsigned, field_bits);
-            bits += field_bits;
-            const field_bigint = bigint_mut.toConst();
+        const target = mod.getTarget();
+        const endian = target.cpu.arch.endian();
+        switch (ty.zigTypeTag()) {
+            .Void => return Value.@"void",
+            .Bool => {
+                const byte = switch (endian) {
+                    .Big => buffer[buffer.len - bit_offset / 8 - 1],
+                    .Little => buffer[bit_offset / 8],
+                };
+                if (((byte >> @intCast(u3, bit_offset % 8)) & 1) == 0) {
+                    return Value.@"false";
+                } else {
+                    return Value.@"true";
+                }
+            },
+            .Int, .Enum => {
+                if (buffer.len == 0) return Value.zero;
+                const int_info = ty.intInfo(target);
+                const abi_size = @intCast(usize, ty.abiSize(target));
 
-            field_vals[i] = switch (field.ty.zigTypeTag()) {
-                .Float => switch (field.ty.floatBits(target)) {
-                    16 => try bitCastBigIntToFloat(f16, .float_16, field_bigint, arena),
-                    32 => try bitCastBigIntToFloat(f32, .float_32, field_bigint, arena),
-                    64 => try bitCastBigIntToFloat(f64, .float_64, field_bigint, arena),
-                    80 => try bitCastBigIntToFloat(f80, .float_80, field_bigint, arena),
-                    128 => try bitCastBigIntToFloat(f128, .float_128, field_bigint, arena),
-                    else => unreachable,
-                },
-                .Bool => makeBool(!field_bigint.eqZero()),
-                .Int => try Tag.int_big_positive.create(
-                    arena,
-                    try arena.dupe(std.math.big.Limb, field_bigint.limbs),
-                ),
-                .Struct => try intToPackedStruct(field.ty, target, field_bigint, arena),
+                const bits = int_info.bits;
+                if (bits <= 64) switch (int_info.signedness) { // Fast path for integers <= u64
+                    .signed => return Value.Tag.int_i64.create(arena, std.mem.readVarPackedInt(i64, buffer, bit_offset, bits, endian, .signed)),
+                    .unsigned => return Value.Tag.int_u64.create(arena, std.mem.readVarPackedInt(u64, buffer, bit_offset, bits, endian, .unsigned)),
+                } else { // Slow path, we have to construct a big-int
+                    const Limb = std.math.big.Limb;
+                    const limb_count = (abi_size + @sizeOf(Limb) - 1) / @sizeOf(Limb);
+                    const limbs_buffer = try arena.alloc(Limb, limb_count);
+
+                    var bigint = BigIntMutable.init(limbs_buffer, 0);
+                    bigint.readPackedTwosComplement(buffer, bit_offset, bits, endian, int_info.signedness);
+                    return fromBigInt(arena, bigint.toConst());
+                }
+            },
+            .Float => switch (ty.floatBits(target)) {
+                16 => return Value.Tag.float_16.create(arena, @bitCast(f16, std.mem.readPackedInt(u16, buffer, bit_offset, endian))),
+                32 => return Value.Tag.float_32.create(arena, @bitCast(f32, std.mem.readPackedInt(u32, buffer, bit_offset, endian))),
+                64 => return Value.Tag.float_64.create(arena, @bitCast(f64, std.mem.readPackedInt(u64, buffer, bit_offset, endian))),
+                80 => return Value.Tag.float_80.create(arena, @bitCast(f80, std.mem.readPackedInt(u80, buffer, bit_offset, endian))),
+                128 => return Value.Tag.float_128.create(arena, @bitCast(f128, std.mem.readPackedInt(u128, buffer, bit_offset, endian))),
                 else => unreachable,
-            };
-        }
-        return Tag.aggregate.create(arena, field_vals);
-    }
-
-    fn bitCastBigIntToFloat(
-        comptime F: type,
-        comptime float_tag: Tag,
-        bigint: BigIntConst,
-        arena: Allocator,
-    ) !Value {
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = bigint.to(Int) catch |err| switch (err) {
-            error.NegativeIntoUnsigned => unreachable,
-            error.TargetTooSmall => unreachable,
-        };
-        const f = @bitCast(F, int);
-        return float_tag.create(arena, f);
-    }
-
-    fn floatWriteToMemory(comptime F: type, f: F, target: Target, buffer: []u8) void {
-        const endian = target.cpu.arch.endian();
-        if (F == f80) {
-            const repr = std.math.break_f80(f);
-            std.mem.writeInt(u64, buffer[0..8], repr.fraction, endian);
-            std.mem.writeInt(u16, buffer[8..10], repr.exp, endian);
-            std.mem.set(u8, buffer[10..], 0);
-            return;
-        }
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = @bitCast(Int, f);
-        std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], int, endian);
-    }
-
-    fn floatReadFromMemory(comptime F: type, target: Target, buffer: []const u8) F {
-        const endian = target.cpu.arch.endian();
-        if (F == f80) {
-            return std.math.make_f80(.{
-                .fraction = readInt(u64, buffer[0..8], endian),
-                .exp = readInt(u16, buffer[8..10], endian),
-            });
-        }
-        const Int = @Type(.{ .Int = .{
-            .signedness = .unsigned,
-            .bits = @typeInfo(F).Float.bits,
-        } });
-        const int = readInt(Int, buffer[0..@sizeOf(Int)], endian);
-        return @bitCast(F, int);
-    }
-
-    fn readInt(comptime Int: type, buffer: *const [@sizeOf(Int)]u8, endian: std.builtin.Endian) Int {
-        var result: Int = 0;
-        switch (endian) {
-            .Big => {
-                for (buffer) |byte| {
-                    result <<= 8;
-                    result |= byte;
-                }
             },
-            .Little => {
-                var i: usize = buffer.len;
-                while (i != 0) {
-                    i -= 1;
-                    result <<= 8;
-                    result |= buffer[i];
+            .Vector => {
+                const elem_ty = ty.childType();
+                const elems = try arena.alloc(Value, @intCast(usize, ty.arrayLen()));
+
+                var bits: u16 = 0;
+                const elem_bit_size = @intCast(u16, elem_ty.bitSize(target));
+                for (elems) |_, i| {
+                    // On big-endian systems, LLVM reverses the element order of vectors by default
+                    const tgt_elem_i = if (endian == .Big) elems.len - i - 1 else i;
+                    elems[tgt_elem_i] = try readFromPackedMemory(elem_ty, mod, buffer, bit_offset + bits, arena);
+                    bits += elem_bit_size;
                 }
+                return Tag.aggregate.create(arena, elems);
             },
+            .Struct => switch (ty.containerLayout()) {
+                .Auto => unreachable, // Sema is supposed to have emitted a compile error already
+                .Extern => unreachable, // Handled by non-packed readFromMemory
+                .Packed => {
+                    var bits: u16 = 0;
+                    const fields = ty.structFields().values();
+                    const field_vals = try arena.alloc(Value, fields.len);
+                    for (fields) |field, i| {
+                        const field_bits = @intCast(u16, field.ty.bitSize(target));
+                        field_vals[i] = try readFromPackedMemory(field.ty, mod, buffer, bit_offset + bits, arena);
+                        bits += field_bits;
+                    }
+                    return Tag.aggregate.create(arena, field_vals);
+                },
+            },
+            else => @panic("TODO implement readFromPackedMemory for more types"),
         }
-        return result;
     }
 
     /// Asserts that the value is a float or an integer.
diff --git a/test/behavior/bitcast.zig b/test/behavior/bitcast.zig
index c629a1a34b..b225c31858 100644
--- a/test/behavior/bitcast.zig
+++ b/test/behavior/bitcast.zig
@@ -63,6 +63,10 @@ fn testBitCast(comptime N: usize) !void {
     try expect(conv_iN(N, 0) == 0);
 
     try expect(conv_iN(N, -0) == 0);
+
+    if (N > 24) {
+        try expect(conv_uN(N, 0xf23456) == 0xf23456);
+    }
 }
 
 fn conv_iN(comptime N: usize, x: std.meta.Int(.signed, N)) std.meta.Int(.unsigned, N) {
@@ -73,6 +77,55 @@ fn conv_uN(comptime N: usize, x: std.meta.Int(.unsigned, N)) std.meta.Int(.signe
     return @bitCast(std.meta.Int(.signed, N), x);
 }
 
+test "bitcast uX to bytes" {
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
+
+    const bit_values = [_]usize{ 1, 48, 27, 512, 493, 293, 125, 204, 112 };
+    inline for (bit_values) |bits| {
+        try testBitCast(bits);
+        comptime try testBitCast(bits);
+    }
+}
+
+fn testBitCastuXToBytes(comptime N: usize) !void {
+
+    // The location of padding bits in these layouts are technically not defined
+    // by LLVM, but we currently allow exotic integers to be cast (at comptime)
+    // to types that expose their padding bits anyway.
+    //
+    // This test at least makes sure those bits are matched by the runtime behavior
+    // on the platforms we target. If the above behavior is restricted after all,
+    // this test should be deleted.
+
+    const T = std.meta.Int(.unsigned, N);
+    for ([_]T{ 0, ~@as(T, 0) }) |init_value| {
+        var x: T = init_value;
+        const bytes = std.mem.asBytes(&x);
+
+        const byte_count = (N + 7) / 8;
+        switch (builtin.cpu.arch.endian()) {
+            .Little => {
+                var byte_i = 0;
+                while (byte_i < (byte_count - 1)) : (byte_i += 1) {
+                    try expect(bytes[byte_i] == 0xff);
+                }
+                try expect(((bytes[byte_i] ^ 0xff) << -%@truncate(u3, N)) == 0);
+            },
+            .Big => {
+                var byte_i = byte_count - 1;
+                while (byte_i > 0) : (byte_i -= 1) {
+                    try expect(bytes[byte_i] == 0xff);
+                }
+                try expect(((bytes[byte_i] ^ 0xff) << -%@truncate(u3, N)) == 0);
+            },
+        }
+    }
+}
+
 test "nested bitcast" {
     const S = struct {
         fn moo(x: isize) !void {
@@ -283,7 +336,8 @@ test "@bitCast packed struct of floats" {
     comptime try S.doTheTest();
 }
 
-test "comptime @bitCast packed struct to int" {
+test "comptime @bitCast packed struct to int and back" {
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
     if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
@@ -304,6 +358,37 @@ test "comptime @bitCast packed struct to int" {
         vectorf: @Vector(2, f16) = .{ 3.14, 2.71 },
     };
     const Int = @typeInfo(S).Struct.backing_integer.?;
+
+    // S -> Int
     var s: S = .{};
     try expectEqual(@bitCast(Int, s), comptime @bitCast(Int, S{}));
+
+    // Int -> S
+    var i: Int = 0;
+    const rt_cast = @bitCast(S, i);
+    const ct_cast = comptime @bitCast(S, @as(Int, 0));
+    inline for (@typeInfo(S).Struct.fields) |field| {
+        if (@typeInfo(field.field_type) == .Vector)
+            continue; //TODO: https://github.com/ziglang/zig/issues/13201
+
+        try expectEqual(@field(rt_cast, field.name), @field(ct_cast, field.name));
+    }
+}
+
+test "comptime bitcast with fields following a float" {
+    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO: https://github.com/ziglang/zig/issues/13214
+
+    const FloatT = extern struct { f: f80, x: u128 };
+    var x: FloatT = .{ .f = 0.5, .x = 123 };
+    try expect(@bitCast(u256, x) == comptime @bitCast(u256, @as(FloatT, .{ .f = 0.5, .x = 123 })));
+}
+
+test "bitcast vector to integer and back" {
+    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO: https://github.com/ziglang/zig/issues/13220
+    if (builtin.zig_backend == .stage1) return error.SkipZigTest; // stage1 gets the comptime cast wrong
+
+    const arr: [16]bool = [_]bool{ true, false } ++ [_]bool{true} ** 14;
+    var x = @splat(16, true);
+    x[1] = false;
+    try expect(@bitCast(u16, x) == comptime @bitCast(u16, @as(@Vector(16, bool), arr)));
 }

From 9d0a4b60e1c715238f96a64c45619a680f94c300 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Wed, 19 Oct 2022 12:02:21 -0700
Subject: [PATCH 3/5] Value: Add `@intCast` in `writeToPackedMemory` for 32-bit
 targets

---
 src/value.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/value.zig b/src/value.zig
index 7468a69fda..2ecfaefa56 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1357,9 +1357,9 @@ pub const Value = extern union {
                 else => unreachable,
             },
             .Vector => {
-                const len = ty.arrayLen();
                 const elem_ty = ty.childType();
                 const elem_bit_size = @intCast(u16, elem_ty.bitSize(target));
+                const len = @intCast(usize, ty.arrayLen());
 
                 var bits: u16 = 0;
                 var elem_i: usize = 0;

From 03ed0a59e337d89c9650cc8a70ff431b66ee8671 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Wed, 19 Oct 2022 12:14:18 -0700
Subject: [PATCH 4/5] std.mem: Skip `read/writePackedInt` test on WASM32/64

---
 lib/std/mem.zig | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index f0403942f1..29bbf05f77 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -3700,6 +3700,13 @@ pub fn alignInSlice(slice: anytype, comptime new_alignment: usize) ?AlignedSlice
 }
 
 test "read/write(Var)PackedInt" {
+    switch (builtin.cpu.arch) {
+        // This test generates too much code to execute on WASI.
+        // LLVM backend fails with "too many locals: locals exceed maximum"
+        .wasm32, .wasm64 => return error.SkipZigTest,
+        else => {},
+    }
+
     const foreign_endian: Endian = if (native_endian == .Big) .Little else .Big;
     const expect = std.testing.expect;
     var prng = std.rand.DefaultPrng.init(1234);

From 40b7792a4c815868bafe882cc77d89a67c08571b Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 28 Oct 2022 08:51:15 -0700
Subject: [PATCH 5/5] Enable bitcast test now that #13214 is resolved.

---
 test/behavior/bitcast.zig | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/test/behavior/bitcast.zig b/test/behavior/bitcast.zig
index b225c31858..728e8fe62a 100644
--- a/test/behavior/bitcast.zig
+++ b/test/behavior/bitcast.zig
@@ -375,12 +375,19 @@ test "comptime @bitCast packed struct to int and back" {
     }
 }
 
-test "comptime bitcast with fields following a float" {
-    if (builtin.zig_backend != .stage1) return error.SkipZigTest; // TODO: https://github.com/ziglang/zig/issues/13214
+test "comptime bitcast with fields following f80" {
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
 
-    const FloatT = extern struct { f: f80, x: u128 };
-    var x: FloatT = .{ .f = 0.5, .x = 123 };
-    try expect(@bitCast(u256, x) == comptime @bitCast(u256, @as(FloatT, .{ .f = 0.5, .x = 123 })));
+    const FloatT = extern struct { f: f80, x: u128 align(16) };
+    const x: FloatT = .{ .f = 0.5, .x = 123 };
+    var x_as_uint: u256 = comptime @bitCast(u256, x);
+
+    try expect(x.f == @bitCast(FloatT, x_as_uint).f);
+    try expect(x.x == @bitCast(FloatT, x_as_uint).x);
 }
 
 test "bitcast vector to integer and back" {