From 8978fe94cfe08d4140bd968b1910348136b4b77d Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 18:42:56 +0000 Subject: [PATCH 1/7] Overhauled leb128: handles integers < 8 bits incorrect overflow bugs fixed simplified *mem implementations added wrte* functions added thurough write/read testing --- lib/std/debug/leb128.zig | 344 ++++++++++++++++++++++++++------------- 1 file changed, 228 insertions(+), 116 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index ac278c4b1f..f9e25d9ee7 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,171 +1,198 @@ const std = @import("std"); const testing = std.testing; -pub fn readULEB128(comptime T: type, in_stream: var) !T { - const ShiftT = std.meta.Int(false, std.math.log2(T.bit_count)); +//@TODO: you can take *slice and alter slice.ptr +// make sign bits check more efficient +// add wrapper readLEB128 and write LEB128 that infer from type? +// or use assertions? - var result: T = 0; - var shift: usize = 0; +pub fn readULEB128(comptime T: type, reader: var) !T { + const U = if (T.bit_count < 8) u8 else T; + const ShiftT = std.math.Log2Int(U); + + const max_group = (U.bit_count + 6) / 7; + + var value = @as(U, 0); + var group = @as(ShiftT, 0); + + while (group < max_group) : (group += 1) { + const byte = try reader.readByte(); + var temp = @as(U, byte & 0x7f); + + if (@shlWithOverflow(U, temp, group * 7, &temp)) return error.Overflow; + + value |= temp; + if (byte & 0x80 == 0) break; + } else { + return error.Overflow; + } + + //only applies in the case that we extended to u8 + if (value > std.math.maxInt(T)) return error.Overflow; + + return @truncate(T, value); +} + +pub fn writeULEB128(writer: var, uint_value: var) !void { + const T = @TypeOf(uint_value); + const U = if (T.bit_count < 8) u8 else T; + var value = @intCast(U, uint_value); while (true) { - const byte = try in_stream.readByte(); - - if (shift > T.bit_count) - return error.Overflow; - - var operand: T = undefined; - if (@shlWithOverflow(T, byte & 0x7f, @intCast(ShiftT, shift), &operand)) - return error.Overflow; - - result |= operand; - - if ((byte & 0x80) == 0) - return result; - - shift += 7; + const byte = @truncate(u8, value & 0x7f); + value >>= 7; + if (value == 0) { + try writer.writeByte(byte); + break; + } else { + try writer.writeByte(byte | 0x80); + } } } pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const ShiftT = std.meta.Int(false, std.math.log2(T.bit_count)); - - var result: T = 0; - var shift: usize = 0; - var i: usize = 0; - - while (true) : (i += 1) { - const byte = ptr.*[i]; - - if (shift > T.bit_count) - return error.Overflow; - - var operand: T = undefined; - if (@shlWithOverflow(T, byte & 0x7f, @intCast(ShiftT, shift), &operand)) - return error.Overflow; - - result |= operand; - - if ((byte & 0x80) == 0) { - ptr.* += i + 1; - return result; - } - - shift += 7; - } + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); + const value = try readULEB128(T, buf.reader()); + ptr.* += @intCast(usize, try buf.getPos()); + return value; } -pub fn readILEB128(comptime T: type, in_stream: var) !T { - const UT = std.meta.Int(false, T.bit_count); - const ShiftT = std.meta.Int(false, std.math.log2(T.bit_count)); +pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { + const T = @TypeOf(uint_value); + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr); + try writeULEB128(buf.writer(), uint_value); + return try buf.getPos(); +} - var result: UT = 0; - var shift: usize = 0; +pub fn readILEB128(comptime T: type, reader: var) !T { + const S = if (T.bit_count < 8) i8 else T; + const U = std.meta.Int(false, S.bit_count); + const ShiftU = std.math.Log2Int(U); - while (true) { - const byte: u8 = try in_stream.readByte(); + const max_group = (U.bit_count + 6) / 7; - if (shift > T.bit_count) - return error.Overflow; + var value = @as(U, 0); + var group = @as(ShiftU, 0); - var operand: UT = undefined; - if (@shlWithOverflow(UT, @as(UT, byte & 0x7f), @intCast(ShiftT, shift), &operand)) { - if (byte != 0x7f) - return error.Overflow; + while (group < max_group) : (group += 1) { + const byte = try reader.readByte(); + var temp = @as(U, byte & 0x7f); + + if (@shlWithOverflow(U, temp, group * 7, &temp)) { + //Overflow is ok so long as the sign bit is set and this is the last byte + if (byte & 0x80 != 0) return error.Overflow; + if (@bitCast(S, temp) >= 0) return error.Overflow; + + //and all the overflowed bits are 1 + const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); + const check_bits_remaining = 7 - check_bits_shift; + const check_bits = byte >> check_bits_shift; + const num_consecutive_ones = @ctz(u8, ~check_bits); + if (num_consecutive_ones < check_bits_remaining) return error.Overflow; } - result |= operand; - - shift += 7; - - if ((byte & 0x80) == 0) { - if (shift < T.bit_count and (byte & 0x40) != 0) { - result |= @bitCast(UT, @intCast(T, -1)) << @intCast(ShiftT, shift); + value |= temp; + if (byte & 0x80 == 0) { + if (byte & 0x40 != 0 and group + 1 < max_group) { + value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); } - return @bitCast(T, result); + break; + } + } else { + return error.Overflow; + } + + //Only applies if we extended to i8 + if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; + + return @truncate(T, @bitCast(S, value)); +} + +pub fn writeILEB128(writer: var, int_value: var) !void { + const T = @TypeOf(int_value); + const S = if (T.bit_count < 8) i8 else T; + const U = std.meta.Int(false, S.bit_count); + + var value = @intCast(S, int_value); + + while (true) { + const uvalue = @bitCast(U, value); + const byte = @truncate(u8, uvalue); + value >>= 6; + if (value == -1 or value == 0) { + try writer.writeByte(byte & 0x7F); + break; + } else { + value >>= 1; + try writer.writeByte(byte | 0x80); } } } pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const UT = std.meta.Int(false, T.bit_count); - const ShiftT = std.meta.Int(false, std.math.log2(T.bit_count)); - - var result: UT = 0; - var shift: usize = 0; - var i: usize = 0; - - while (true) : (i += 1) { - const byte = ptr.*[i]; - - if (shift > T.bit_count) - return error.Overflow; - - var operand: UT = undefined; - if (@shlWithOverflow(UT, @as(UT, byte & 0x7f), @intCast(ShiftT, shift), &operand)) { - if (byte != 0x7f) - return error.Overflow; - } - - result |= operand; - - shift += 7; - - if ((byte & 0x80) == 0) { - if (shift < T.bit_count and (byte & 0x40) != 0) { - result |= @bitCast(UT, @intCast(T, -1)) << @intCast(ShiftT, shift); - } - ptr.* += i + 1; - return @bitCast(T, result); - } - } + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); + const value = try readILEB128(T, buf.reader()); + ptr.* += @intCast(usize, try buf.getPos()); + return value; } +pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { + const T = @TypeOf(int_value); + var buf = std.io.fixedBufferStream(ptr); + try writeILEB128(buf.writer(), int_value); + return try buf.getPos(); +} + +//tests fn test_read_stream_ileb128(comptime T: type, encoded: []const u8) !T { - var in_stream = std.io.fixedBufferStream(encoded); - return try readILEB128(T, in_stream.inStream()); + var reader = std.io.fixedBufferStream(encoded); + return try readILEB128(T, reader.reader()); } fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { - var in_stream = std.io.fixedBufferStream(encoded); - return try readULEB128(T, in_stream.inStream()); + var reader = std.io.fixedBufferStream(encoded); + return try readULEB128(T, reader.reader()); } fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { - var in_stream = std.io.fixedBufferStream(encoded); - const v1 = readILEB128(T, in_stream.inStream()); + var reader = std.io.fixedBufferStream(encoded); + const v1 = try readILEB128(T, reader.reader()); var in_ptr = encoded.ptr; - const v2 = readILEB128Mem(T, &in_ptr); + const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; } fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { - var in_stream = std.io.fixedBufferStream(encoded); - const v1 = readULEB128(T, in_stream.inStream()); + var reader = std.io.fixedBufferStream(encoded); + const v1 = try readULEB128(T, reader.reader()); var in_ptr = encoded.ptr; - const v2 = readULEB128Mem(T, &in_ptr); + const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; } -fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) void { - var in_stream = std.io.fixedBufferStream(encoded); +fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { + var reader = std.io.fixedBufferStream(encoded); var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { - const v1 = readILEB128(T, in_stream.inStream()); - const v2 = readILEB128Mem(T, &in_ptr); + const v1 = try readILEB128(T, reader.reader()); + const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); } } -fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) void { - var in_stream = std.io.fixedBufferStream(encoded); +fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { + var reader = std.io.fixedBufferStream(encoded); var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { - const v1 = readULEB128(T, in_stream.inStream()); - const v2 = readULEB128Mem(T, &in_ptr); + const v1 = try readULEB128(T, reader.reader()); + const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); } } @@ -212,7 +239,7 @@ test "deserialize signed LEB128" { testing.expect((try test_read_ileb128(i64, "\x80\x81\x80\x00")) == 0x80); // Decode sequence of SLEB128 values - test_read_ileb128_seq(i64, 4, "\x81\x01\x3f\x80\x7f\x80\x80\x80\x00"); + try test_read_ileb128_seq(i64, 4, "\x81\x01\x3f\x80\x7f\x80\x80\x80\x00"); } test "deserialize unsigned LEB128" { @@ -252,5 +279,90 @@ test "deserialize unsigned LEB128" { testing.expect((try test_read_uleb128(u64, "\x80\x81\x80\x00")) == 0x80); // Decode sequence of ULEB128 values - test_read_uleb128_seq(u64, 4, "\x81\x01\x3f\x80\x7f\x80\x80\x80\x00"); + try test_read_uleb128_seq(u64, 4, "\x81\x01\x3f\x80\x7f\x80\x80\x80\x00"); +} + +fn test_write_leb128(value: var) !void { + const T = @TypeOf(value); + + if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); + + const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; + const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; + const readStream = if (T.is_signed) readILEB128 else readULEB128; + const readMem = if (T.is_signed) readILEB128Mem else readULEB128Mem; + + //decode to a larger bit size too, to ensure sign extension + // is working as expected + const larger_type_bits = ((T.bit_count + 8) / 8) * 8; + const B = std.meta.Int(T.is_signed, larger_type_bits); + const max_groups = if (T.bit_count == 0) 1 else (T.bit_count + 6) / 7; + + var buf: [max_groups]u8 = undefined; + var fbs = std.io.fixedBufferStream(&buf); + + //stream write + try writeStream(fbs.writer(), value); + const w1_pos = fbs.pos; + testing.expect(w1_pos > 0); + + //stream read + fbs.pos = 0; + const sr = try readStream(T, fbs.reader()); + testing.expect(fbs.pos == w1_pos); + testing.expect(sr == value); + + //bigger type stream read + fbs.pos = 0; + const bsr = try readStream(B, fbs.reader()); + testing.expect(fbs.pos == w1_pos); + testing.expect(bsr == value); + + //mem write + const w2_pos = try writeMem(&buf, value); + testing.expect(w2_pos == w1_pos); + + //mem read + var buf_ref: []u8 = buf[0..]; + const mr = try readMem(T, &buf_ref.ptr); + testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); + testing.expect(mr == value); + + //bigger type mem read + buf_ref = buf[0..]; + const bmr = try readMem(T, &buf_ref.ptr); + testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); + testing.expect(bmr == value); +} + +test "serialize unsigned LEB128" { + const max_bits = 18; + + comptime var t = 0; + inline while (t <= max_bits) : (t += 1) { + const T = std.meta.Int(false, t); + const min = std.math.minInt(T); + const max = std.math.maxInt(T); + var i = @as(std.meta.Int(false, T.bit_count + 1), min); + + while (i <= max) : (i += 1) try test_write_leb128(@intCast(T, i)); + } +} + +test "serialize signed LEB128" { + //explicitly test i0 because starting `t` at 0 + // will break the while loop + try test_write_leb128(@as(i0, 0)); + + const max_bits = 18; + + comptime var t = 1; + inline while (t <= max_bits) : (t += 1) { + const T = std.meta.Int(true, t); + const min = std.math.minInt(T); + const max = std.math.maxInt(T); + var i = @as(std.meta.Int(true, T.bit_count + 1), min); + + while (i <= max) : (i += 1) try test_write_leb128(@intCast(T, i)); + } } From 7f24860737fba2b40c375b7c9fb37fe694bfa539 Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 20:53:25 +0000 Subject: [PATCH 2/7] Code cleanup, documentation added, read*Mem functions now take *[]const u8 --- lib/std/debug/leb128.zig | 79 +++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index f9e25d9ee7..76e76d8e4e 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,11 +1,8 @@ const std = @import("std"); const testing = std.testing; -//@TODO: you can take *slice and alter slice.ptr -// make sign bits check more efficient -// add wrapper readLEB128 and write LEB128 that infer from type? -// or use assertions? - +///Read a single unsigned LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; const ShiftT = std.math.Log2Int(U); @@ -28,11 +25,14 @@ pub fn readULEB128(comptime T: type, reader: var) !T { } //only applies in the case that we extended to u8 - if (value > std.math.maxInt(T)) return error.Overflow; + if (U != T) { + if (value > std.math.maxInt(T)) return error.Overflow; + } return @truncate(T, value); } +///Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,22 +50,27 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single unsinged integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readULEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); const max_group = (T.bit_count + 6) / 7; var buf = std.io.fixedBufferStream(ptr); try writeULEB128(buf.writer(), uint_value); - return try buf.getPos(); + return buf.pos; } +///Read a single signed LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; const U = std.meta.Int(false, S.bit_count); @@ -80,23 +85,24 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const byte = try reader.readByte(); var temp = @as(U, byte & 0x7f); - if (@shlWithOverflow(U, temp, group * 7, &temp)) { + const shift = group * 7; + if (@shlWithOverflow(U, temp, shift, &temp)) { //Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; //and all the overflowed bits are 1 - const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); - const check_bits_remaining = 7 - check_bits_shift; - const check_bits = byte >> check_bits_shift; - const num_consecutive_ones = @ctz(u8, ~check_bits); - if (num_consecutive_ones < check_bits_remaining) return error.Overflow; + const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); + const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; + if (remaining_bits != -1) return error.Overflow; } value |= temp; if (byte & 0x80 == 0) { - if (byte & 0x40 != 0 and group + 1 < max_group) { - value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); + const needs_sign_ext = group + 1 < max_group; + if (byte & 0x40 != 0 and needs_sign_ext) { + const ones = @as(S, -1); + value |= @bitCast(U, ones) << (shift + 7); } break; } @@ -104,12 +110,16 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return error.Overflow; } + const result = @bitCast(S, value); //Only applies if we extended to i8 - if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; + if (S != T) { + if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; + } - return @truncate(T, @bitCast(S, value)); + return @truncate(T, result); } +///Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -131,19 +141,22 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single singed LEB128 integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readILEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single signed LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); var buf = std.io.fixedBufferStream(ptr); try writeILEB128(buf.writer(), int_value); - return try buf.getPos(); + return buf.pos; } //tests @@ -160,7 +173,7 @@ fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readILEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -169,7 +182,7 @@ fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readULEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -177,7 +190,7 @@ fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readILEB128(T, reader.reader()); @@ -188,7 +201,7 @@ fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readULEB128(T, reader.reader()); @@ -285,8 +298,6 @@ test "deserialize unsigned LEB128" { fn test_write_leb128(value: var) !void { const T = @TypeOf(value); - if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); - const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; const readStream = if (T.is_signed) readILEB128 else readULEB128; @@ -324,13 +335,13 @@ fn test_write_leb128(value: var) !void { //mem read var buf_ref: []u8 = buf[0..]; - const mr = try readMem(T, &buf_ref.ptr); + const mr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); //bigger type mem read buf_ref = buf[0..]; - const bmr = try readMem(T, &buf_ref.ptr); + const bmr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(bmr == value); } From e94eba5df552abf63ee55846ac62b1e0f5a80098 Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 18:42:56 +0000 Subject: [PATCH 3/7] Overhauled leb128: handles integers < 8 bits incorrect overflow bugs fixed simplified *mem implementations added wrte* functions added thurough write/read testing --- lib/std/debug/leb128.zig | 79 +++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index 76e76d8e4e..f9e25d9ee7 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,8 +1,11 @@ const std = @import("std"); const testing = std.testing; -///Read a single unsigned LEB128 value from the given reader as type T, -/// or error.Overflow if the value cannot fit. +//@TODO: you can take *slice and alter slice.ptr +// make sign bits check more efficient +// add wrapper readLEB128 and write LEB128 that infer from type? +// or use assertions? + pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; const ShiftT = std.math.Log2Int(U); @@ -25,14 +28,11 @@ pub fn readULEB128(comptime T: type, reader: var) !T { } //only applies in the case that we extended to u8 - if (U != T) { - if (value > std.math.maxInt(T)) return error.Overflow; - } + if (value > std.math.maxInt(T)) return error.Overflow; return @truncate(T, value); } -///Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,27 +50,22 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -///Read a single unsinged integer from the given memory as type T. -/// The provided slice reference will be updated to point to the byte after the last byte read. -pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { - var buf = std.io.fixedBufferStream(ptr.*); +pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); const value = try readULEB128(T, buf.reader()); - ptr.*.ptr += buf.pos; + ptr.* += @intCast(usize, try buf.getPos()); return value; } -///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, -/// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); const max_group = (T.bit_count + 6) / 7; var buf = std.io.fixedBufferStream(ptr); try writeULEB128(buf.writer(), uint_value); - return buf.pos; + return try buf.getPos(); } -///Read a single signed LEB128 value from the given reader as type T, -/// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; const U = std.meta.Int(false, S.bit_count); @@ -85,24 +80,23 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const byte = try reader.readByte(); var temp = @as(U, byte & 0x7f); - const shift = group * 7; - if (@shlWithOverflow(U, temp, shift, &temp)) { + if (@shlWithOverflow(U, temp, group * 7, &temp)) { //Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; //and all the overflowed bits are 1 - const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); - const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; - if (remaining_bits != -1) return error.Overflow; + const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); + const check_bits_remaining = 7 - check_bits_shift; + const check_bits = byte >> check_bits_shift; + const num_consecutive_ones = @ctz(u8, ~check_bits); + if (num_consecutive_ones < check_bits_remaining) return error.Overflow; } value |= temp; if (byte & 0x80 == 0) { - const needs_sign_ext = group + 1 < max_group; - if (byte & 0x40 != 0 and needs_sign_ext) { - const ones = @as(S, -1); - value |= @bitCast(U, ones) << (shift + 7); + if (byte & 0x40 != 0 and group + 1 < max_group) { + value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); } break; } @@ -110,16 +104,12 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return error.Overflow; } - const result = @bitCast(S, value); //Only applies if we extended to i8 - if (S != T) { - if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; - } + if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; - return @truncate(T, result); + return @truncate(T, @bitCast(S, value)); } -///Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -141,22 +131,19 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -///Read a single singed LEB128 integer from the given memory as type T. -/// The provided slice reference will be updated to point to the byte after the last byte read. -pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { - var buf = std.io.fixedBufferStream(ptr.*); +pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); const value = try readILEB128(T, buf.reader()); - ptr.*.ptr += buf.pos; + ptr.* += @intCast(usize, try buf.getPos()); return value; } -///Write a single signed LEB128 integer to the given memory as unsigned LEB128, -/// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); var buf = std.io.fixedBufferStream(ptr); try writeILEB128(buf.writer(), int_value); - return buf.pos; + return try buf.getPos(); } //tests @@ -173,7 +160,7 @@ fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readILEB128(T, reader.reader()); - var in_ptr = encoded; + var in_ptr = encoded.ptr; const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -182,7 +169,7 @@ fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readULEB128(T, reader.reader()); - var in_ptr = encoded; + var in_ptr = encoded.ptr; const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -190,7 +177,7 @@ fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded; + var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readILEB128(T, reader.reader()); @@ -201,7 +188,7 @@ fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded; + var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readULEB128(T, reader.reader()); @@ -298,6 +285,8 @@ test "deserialize unsigned LEB128" { fn test_write_leb128(value: var) !void { const T = @TypeOf(value); + if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); + const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; const readStream = if (T.is_signed) readILEB128 else readULEB128; @@ -335,13 +324,13 @@ fn test_write_leb128(value: var) !void { //mem read var buf_ref: []u8 = buf[0..]; - const mr = try readMem(T, &buf_ref); + const mr = try readMem(T, &buf_ref.ptr); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); //bigger type mem read buf_ref = buf[0..]; - const bmr = try readMem(T, &buf_ref); + const bmr = try readMem(T, &buf_ref.ptr); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(bmr == value); } From 928d3ee9ea7f1a7eb17b1ec3a8ec36559e1e9968 Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 20:53:25 +0000 Subject: [PATCH 4/7] Code cleanup, documentation added, read*Mem functions now take *[]const u8 --- lib/std/debug/leb128.zig | 79 +++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index f9e25d9ee7..76e76d8e4e 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,11 +1,8 @@ const std = @import("std"); const testing = std.testing; -//@TODO: you can take *slice and alter slice.ptr -// make sign bits check more efficient -// add wrapper readLEB128 and write LEB128 that infer from type? -// or use assertions? - +///Read a single unsigned LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; const ShiftT = std.math.Log2Int(U); @@ -28,11 +25,14 @@ pub fn readULEB128(comptime T: type, reader: var) !T { } //only applies in the case that we extended to u8 - if (value > std.math.maxInt(T)) return error.Overflow; + if (U != T) { + if (value > std.math.maxInt(T)) return error.Overflow; + } return @truncate(T, value); } +///Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,22 +50,27 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single unsinged integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readULEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); const max_group = (T.bit_count + 6) / 7; var buf = std.io.fixedBufferStream(ptr); try writeULEB128(buf.writer(), uint_value); - return try buf.getPos(); + return buf.pos; } +///Read a single signed LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; const U = std.meta.Int(false, S.bit_count); @@ -80,23 +85,24 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const byte = try reader.readByte(); var temp = @as(U, byte & 0x7f); - if (@shlWithOverflow(U, temp, group * 7, &temp)) { + const shift = group * 7; + if (@shlWithOverflow(U, temp, shift, &temp)) { //Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; //and all the overflowed bits are 1 - const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); - const check_bits_remaining = 7 - check_bits_shift; - const check_bits = byte >> check_bits_shift; - const num_consecutive_ones = @ctz(u8, ~check_bits); - if (num_consecutive_ones < check_bits_remaining) return error.Overflow; + const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); + const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; + if (remaining_bits != -1) return error.Overflow; } value |= temp; if (byte & 0x80 == 0) { - if (byte & 0x40 != 0 and group + 1 < max_group) { - value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); + const needs_sign_ext = group + 1 < max_group; + if (byte & 0x40 != 0 and needs_sign_ext) { + const ones = @as(S, -1); + value |= @bitCast(U, ones) << (shift + 7); } break; } @@ -104,12 +110,16 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return error.Overflow; } + const result = @bitCast(S, value); //Only applies if we extended to i8 - if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; + if (S != T) { + if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; + } - return @truncate(T, @bitCast(S, value)); + return @truncate(T, result); } +///Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -131,19 +141,22 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single singed LEB128 integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readILEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single signed LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); var buf = std.io.fixedBufferStream(ptr); try writeILEB128(buf.writer(), int_value); - return try buf.getPos(); + return buf.pos; } //tests @@ -160,7 +173,7 @@ fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readILEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -169,7 +182,7 @@ fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readULEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -177,7 +190,7 @@ fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readILEB128(T, reader.reader()); @@ -188,7 +201,7 @@ fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readULEB128(T, reader.reader()); @@ -285,8 +298,6 @@ test "deserialize unsigned LEB128" { fn test_write_leb128(value: var) !void { const T = @TypeOf(value); - if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); - const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; const readStream = if (T.is_signed) readILEB128 else readULEB128; @@ -324,13 +335,13 @@ fn test_write_leb128(value: var) !void { //mem read var buf_ref: []u8 = buf[0..]; - const mr = try readMem(T, &buf_ref.ptr); + const mr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); //bigger type mem read buf_ref = buf[0..]; - const bmr = try readMem(T, &buf_ref.ptr); + const bmr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(bmr == value); } From 00ec81b0dc41e113624152cdb268d06f73e6f88c Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 18:42:56 +0000 Subject: [PATCH 5/7] Overhauled leb128: handles integers < 8 bits incorrect overflow bugs fixed simplified *mem implementations added wrte* functions added thurough write/read testing --- lib/std/debug/leb128.zig | 79 +++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index 76e76d8e4e..f9e25d9ee7 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,8 +1,11 @@ const std = @import("std"); const testing = std.testing; -///Read a single unsigned LEB128 value from the given reader as type T, -/// or error.Overflow if the value cannot fit. +//@TODO: you can take *slice and alter slice.ptr +// make sign bits check more efficient +// add wrapper readLEB128 and write LEB128 that infer from type? +// or use assertions? + pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; const ShiftT = std.math.Log2Int(U); @@ -25,14 +28,11 @@ pub fn readULEB128(comptime T: type, reader: var) !T { } //only applies in the case that we extended to u8 - if (U != T) { - if (value > std.math.maxInt(T)) return error.Overflow; - } + if (value > std.math.maxInt(T)) return error.Overflow; return @truncate(T, value); } -///Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,27 +50,22 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -///Read a single unsinged integer from the given memory as type T. -/// The provided slice reference will be updated to point to the byte after the last byte read. -pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { - var buf = std.io.fixedBufferStream(ptr.*); +pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); const value = try readULEB128(T, buf.reader()); - ptr.*.ptr += buf.pos; + ptr.* += @intCast(usize, try buf.getPos()); return value; } -///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, -/// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); const max_group = (T.bit_count + 6) / 7; var buf = std.io.fixedBufferStream(ptr); try writeULEB128(buf.writer(), uint_value); - return buf.pos; + return try buf.getPos(); } -///Read a single signed LEB128 value from the given reader as type T, -/// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; const U = std.meta.Int(false, S.bit_count); @@ -85,24 +80,23 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const byte = try reader.readByte(); var temp = @as(U, byte & 0x7f); - const shift = group * 7; - if (@shlWithOverflow(U, temp, shift, &temp)) { + if (@shlWithOverflow(U, temp, group * 7, &temp)) { //Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; //and all the overflowed bits are 1 - const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); - const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; - if (remaining_bits != -1) return error.Overflow; + const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); + const check_bits_remaining = 7 - check_bits_shift; + const check_bits = byte >> check_bits_shift; + const num_consecutive_ones = @ctz(u8, ~check_bits); + if (num_consecutive_ones < check_bits_remaining) return error.Overflow; } value |= temp; if (byte & 0x80 == 0) { - const needs_sign_ext = group + 1 < max_group; - if (byte & 0x40 != 0 and needs_sign_ext) { - const ones = @as(S, -1); - value |= @bitCast(U, ones) << (shift + 7); + if (byte & 0x40 != 0 and group + 1 < max_group) { + value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); } break; } @@ -110,16 +104,12 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return error.Overflow; } - const result = @bitCast(S, value); //Only applies if we extended to i8 - if (S != T) { - if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; - } + if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; - return @truncate(T, result); + return @truncate(T, @bitCast(S, value)); } -///Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -141,22 +131,19 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -///Read a single singed LEB128 integer from the given memory as type T. -/// The provided slice reference will be updated to point to the byte after the last byte read. -pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { - var buf = std.io.fixedBufferStream(ptr.*); +pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { + const max_group = (T.bit_count + 6) / 7; + var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); const value = try readILEB128(T, buf.reader()); - ptr.*.ptr += buf.pos; + ptr.* += @intCast(usize, try buf.getPos()); return value; } -///Write a single signed LEB128 integer to the given memory as unsigned LEB128, -/// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); var buf = std.io.fixedBufferStream(ptr); try writeILEB128(buf.writer(), int_value); - return buf.pos; + return try buf.getPos(); } //tests @@ -173,7 +160,7 @@ fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readILEB128(T, reader.reader()); - var in_ptr = encoded; + var in_ptr = encoded.ptr; const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -182,7 +169,7 @@ fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readULEB128(T, reader.reader()); - var in_ptr = encoded; + var in_ptr = encoded.ptr; const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -190,7 +177,7 @@ fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded; + var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readILEB128(T, reader.reader()); @@ -201,7 +188,7 @@ fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded; + var in_ptr = encoded.ptr; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readULEB128(T, reader.reader()); @@ -298,6 +285,8 @@ test "deserialize unsigned LEB128" { fn test_write_leb128(value: var) !void { const T = @TypeOf(value); + if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); + const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; const readStream = if (T.is_signed) readILEB128 else readULEB128; @@ -335,13 +324,13 @@ fn test_write_leb128(value: var) !void { //mem read var buf_ref: []u8 = buf[0..]; - const mr = try readMem(T, &buf_ref); + const mr = try readMem(T, &buf_ref.ptr); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); //bigger type mem read buf_ref = buf[0..]; - const bmr = try readMem(T, &buf_ref); + const bmr = try readMem(T, &buf_ref.ptr); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(bmr == value); } From a0160d776f30fab70c786cd1159203a593af51b4 Mon Sep 17 00:00:00 2001 From: tgschultz Date: Thu, 11 Jun 2020 20:53:25 +0000 Subject: [PATCH 6/7] Code cleanup, documentation added, read*Mem functions now take *[]const u8 --- lib/std/debug/leb128.zig | 79 +++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index f9e25d9ee7..76e76d8e4e 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,11 +1,8 @@ const std = @import("std"); const testing = std.testing; -//@TODO: you can take *slice and alter slice.ptr -// make sign bits check more efficient -// add wrapper readLEB128 and write LEB128 that infer from type? -// or use assertions? - +///Read a single unsigned LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; const ShiftT = std.math.Log2Int(U); @@ -28,11 +25,14 @@ pub fn readULEB128(comptime T: type, reader: var) !T { } //only applies in the case that we extended to u8 - if (value > std.math.maxInt(T)) return error.Overflow; + if (U != T) { + if (value > std.math.maxInt(T)) return error.Overflow; + } return @truncate(T, value); } +///Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,22 +50,27 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -pub fn readULEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single unsinged integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readULEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); const max_group = (T.bit_count + 6) / 7; var buf = std.io.fixedBufferStream(ptr); try writeULEB128(buf.writer(), uint_value); - return try buf.getPos(); + return buf.pos; } +///Read a single signed LEB128 value from the given reader as type T, +/// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; const U = std.meta.Int(false, S.bit_count); @@ -80,23 +85,24 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const byte = try reader.readByte(); var temp = @as(U, byte & 0x7f); - if (@shlWithOverflow(U, temp, group * 7, &temp)) { + const shift = group * 7; + if (@shlWithOverflow(U, temp, shift, &temp)) { //Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; //and all the overflowed bits are 1 - const check_bits_shift = @intCast(u3, U.bit_count - @as(u16, group * 7)); - const check_bits_remaining = 7 - check_bits_shift; - const check_bits = byte >> check_bits_shift; - const num_consecutive_ones = @ctz(u8, ~check_bits); - if (num_consecutive_ones < check_bits_remaining) return error.Overflow; + const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); + const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; + if (remaining_bits != -1) return error.Overflow; } value |= temp; if (byte & 0x80 == 0) { - if (byte & 0x40 != 0 and group + 1 < max_group) { - value |= @bitCast(U, @as(S, -1)) << ((group + 1) * 7); + const needs_sign_ext = group + 1 < max_group; + if (byte & 0x40 != 0 and needs_sign_ext) { + const ones = @as(S, -1); + value |= @bitCast(U, ones) << (shift + 7); } break; } @@ -104,12 +110,16 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return error.Overflow; } + const result = @bitCast(S, value); //Only applies if we extended to i8 - if (@bitCast(S, value) > std.math.maxInt(T) or @bitCast(S, value) < std.math.minInt(T)) return error.Overflow; + if (S != T) { + if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; + } - return @truncate(T, @bitCast(S, value)); + return @truncate(T, result); } +///Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -131,19 +141,22 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -pub fn readILEB128Mem(comptime T: type, ptr: *[*]const u8) !T { - const max_group = (T.bit_count + 6) / 7; - var buf = std.io.fixedBufferStream(ptr.*[0 .. max_group + 1]); +///Read a single singed LEB128 integer from the given memory as type T. +/// The provided slice reference will be updated to point to the byte after the last byte read. +pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { + var buf = std.io.fixedBufferStream(ptr.*); const value = try readILEB128(T, buf.reader()); - ptr.* += @intCast(usize, try buf.getPos()); + ptr.*.ptr += buf.pos; return value; } +///Write a single signed LEB128 integer to the given memory as unsigned LEB128, +/// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); var buf = std.io.fixedBufferStream(ptr); try writeILEB128(buf.writer(), int_value); - return try buf.getPos(); + return buf.pos; } //tests @@ -160,7 +173,7 @@ fn test_read_stream_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readILEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readILEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -169,7 +182,7 @@ fn test_read_ileb128(comptime T: type, encoded: []const u8) !T { fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); const v1 = try readULEB128(T, reader.reader()); - var in_ptr = encoded.ptr; + var in_ptr = encoded; const v2 = try readULEB128Mem(T, &in_ptr); testing.expectEqual(v1, v2); return v1; @@ -177,7 +190,7 @@ fn test_read_uleb128(comptime T: type, encoded: []const u8) !T { fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readILEB128(T, reader.reader()); @@ -188,7 +201,7 @@ fn test_read_ileb128_seq(comptime T: type, comptime N: usize, encoded: []const u fn test_read_uleb128_seq(comptime T: type, comptime N: usize, encoded: []const u8) !void { var reader = std.io.fixedBufferStream(encoded); - var in_ptr = encoded.ptr; + var in_ptr = encoded; var i: usize = 0; while (i < N) : (i += 1) { const v1 = try readULEB128(T, reader.reader()); @@ -285,8 +298,6 @@ test "deserialize unsigned LEB128" { fn test_write_leb128(value: var) !void { const T = @TypeOf(value); - if (T.bit_count == 0) std.debug.warn("{}\n", .{@typeName(T)}); - const writeStream = if (T.is_signed) writeILEB128 else writeULEB128; const writeMem = if (T.is_signed) writeILEB128Mem else writeULEB128Mem; const readStream = if (T.is_signed) readILEB128 else readULEB128; @@ -324,13 +335,13 @@ fn test_write_leb128(value: var) !void { //mem read var buf_ref: []u8 = buf[0..]; - const mr = try readMem(T, &buf_ref.ptr); + const mr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); //bigger type mem read buf_ref = buf[0..]; - const bmr = try readMem(T, &buf_ref.ptr); + const bmr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(bmr == value); } From 38e69a9e6a6d69a48d130382dcb61ba2516863d0 Mon Sep 17 00:00:00 2001 From: tgschultz Date: Tue, 16 Jun 2020 16:11:39 +0000 Subject: [PATCH 7/7] Added test to ensure minimum number of bytes is emitted for writes --- lib/std/debug/leb128.zig | 55 ++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/lib/std/debug/leb128.zig b/lib/std/debug/leb128.zig index 76e76d8e4e..16a23da123 100644 --- a/lib/std/debug/leb128.zig +++ b/lib/std/debug/leb128.zig @@ -1,7 +1,7 @@ const std = @import("std"); const testing = std.testing; -///Read a single unsigned LEB128 value from the given reader as type T, +/// Read a single unsigned LEB128 value from the given reader as type T, /// or error.Overflow if the value cannot fit. pub fn readULEB128(comptime T: type, reader: var) !T { const U = if (T.bit_count < 8) u8 else T; @@ -24,7 +24,7 @@ pub fn readULEB128(comptime T: type, reader: var) !T { return error.Overflow; } - //only applies in the case that we extended to u8 + // only applies in the case that we extended to u8 if (U != T) { if (value > std.math.maxInt(T)) return error.Overflow; } @@ -32,7 +32,7 @@ pub fn readULEB128(comptime T: type, reader: var) !T { return @truncate(T, value); } -///Write a single unsigned integer as unsigned LEB128 to the given writer. +/// Write a single unsigned integer as unsigned LEB128 to the given writer. pub fn writeULEB128(writer: var, uint_value: var) !void { const T = @TypeOf(uint_value); const U = if (T.bit_count < 8) u8 else T; @@ -50,7 +50,7 @@ pub fn writeULEB128(writer: var, uint_value: var) !void { } } -///Read a single unsinged integer from the given memory as type T. +/// Read a single unsinged integer from the given memory as type T. /// The provided slice reference will be updated to point to the byte after the last byte read. pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { var buf = std.io.fixedBufferStream(ptr.*); @@ -59,7 +59,7 @@ pub fn readULEB128Mem(comptime T: type, ptr: *[]const u8) !T { return value; } -///Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, +/// Write a single unsigned LEB128 integer to the given memory as unsigned LEB128, /// returning the number of bytes written. pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { const T = @TypeOf(uint_value); @@ -69,7 +69,7 @@ pub fn writeULEB128Mem(ptr: []u8, uint_value: var) !usize { return buf.pos; } -///Read a single signed LEB128 value from the given reader as type T, +/// Read a single signed LEB128 value from the given reader as type T, /// or error.Overflow if the value cannot fit. pub fn readILEB128(comptime T: type, reader: var) !T { const S = if (T.bit_count < 8) i8 else T; @@ -87,11 +87,11 @@ pub fn readILEB128(comptime T: type, reader: var) !T { const shift = group * 7; if (@shlWithOverflow(U, temp, shift, &temp)) { - //Overflow is ok so long as the sign bit is set and this is the last byte + // Overflow is ok so long as the sign bit is set and this is the last byte if (byte & 0x80 != 0) return error.Overflow; if (@bitCast(S, temp) >= 0) return error.Overflow; - //and all the overflowed bits are 1 + // and all the overflowed bits are 1 const remaining_shift = @intCast(u3, U.bit_count - @as(u16, shift)); const remaining_bits = @bitCast(i8, byte | 0x80) >> remaining_shift; if (remaining_bits != -1) return error.Overflow; @@ -111,7 +111,7 @@ pub fn readILEB128(comptime T: type, reader: var) !T { } const result = @bitCast(S, value); - //Only applies if we extended to i8 + // Only applies if we extended to i8 if (S != T) { if (result > std.math.maxInt(T) or result < std.math.minInt(T)) return error.Overflow; } @@ -119,7 +119,7 @@ pub fn readILEB128(comptime T: type, reader: var) !T { return @truncate(T, result); } -///Write a single signed integer as signed LEB128 to the given writer. +/// Write a single signed integer as signed LEB128 to the given writer. pub fn writeILEB128(writer: var, int_value: var) !void { const T = @TypeOf(int_value); const S = if (T.bit_count < 8) i8 else T; @@ -141,7 +141,7 @@ pub fn writeILEB128(writer: var, int_value: var) !void { } } -///Read a single singed LEB128 integer from the given memory as type T. +/// Read a single singed LEB128 integer from the given memory as type T. /// The provided slice reference will be updated to point to the byte after the last byte read. pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { var buf = std.io.fixedBufferStream(ptr.*); @@ -150,7 +150,7 @@ pub fn readILEB128Mem(comptime T: type, ptr: *[]const u8) !T { return value; } -///Write a single signed LEB128 integer to the given memory as unsigned LEB128, +/// Write a single signed LEB128 integer to the given memory as unsigned LEB128, /// returning the number of bytes written. pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { const T = @TypeOf(int_value); @@ -159,7 +159,7 @@ pub fn writeILEB128Mem(ptr: []u8, int_value: var) !usize { return buf.pos; } -//tests +// tests fn test_read_stream_ileb128(comptime T: type, encoded: []const u8) !T { var reader = std.io.fixedBufferStream(encoded); return try readILEB128(T, reader.reader()); @@ -303,43 +303,54 @@ fn test_write_leb128(value: var) !void { const readStream = if (T.is_signed) readILEB128 else readULEB128; const readMem = if (T.is_signed) readILEB128Mem else readULEB128Mem; - //decode to a larger bit size too, to ensure sign extension + // decode to a larger bit size too, to ensure sign extension // is working as expected const larger_type_bits = ((T.bit_count + 8) / 8) * 8; const B = std.meta.Int(T.is_signed, larger_type_bits); + + const bytes_needed = bn: { + const S = std.meta.Int(T.is_signed, @sizeOf(T) * 8); + if (T.bit_count <= 7) break :bn @as(u16, 1); + + const unused_bits = if (value < 0) @clz(T, ~value) else @clz(T, value); + const used_bits: u16 = (T.bit_count - unused_bits) + @boolToInt(T.is_signed); + if (used_bits <= 7) break :bn @as(u16, 1); + break :bn ((used_bits + 6) / 7); + }; + const max_groups = if (T.bit_count == 0) 1 else (T.bit_count + 6) / 7; var buf: [max_groups]u8 = undefined; var fbs = std.io.fixedBufferStream(&buf); - //stream write + // stream write try writeStream(fbs.writer(), value); const w1_pos = fbs.pos; - testing.expect(w1_pos > 0); + testing.expect(w1_pos == bytes_needed); - //stream read + // stream read fbs.pos = 0; const sr = try readStream(T, fbs.reader()); testing.expect(fbs.pos == w1_pos); testing.expect(sr == value); - //bigger type stream read + // bigger type stream read fbs.pos = 0; const bsr = try readStream(B, fbs.reader()); testing.expect(fbs.pos == w1_pos); testing.expect(bsr == value); - //mem write + // mem write const w2_pos = try writeMem(&buf, value); testing.expect(w2_pos == w1_pos); - //mem read + // mem read var buf_ref: []u8 = buf[0..]; const mr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); testing.expect(mr == value); - //bigger type mem read + // bigger type mem read buf_ref = buf[0..]; const bmr = try readMem(T, &buf_ref); testing.expect(@ptrToInt(buf_ref.ptr) - @ptrToInt(&buf) == w2_pos); @@ -361,7 +372,7 @@ test "serialize unsigned LEB128" { } test "serialize signed LEB128" { - //explicitly test i0 because starting `t` at 0 + // explicitly test i0 because starting `t` at 0 // will break the while loop try test_write_leb128(@as(i0, 0));