std.unicode: cleanup allocations on error in allocating functions

Fixes leaks when `utf16leToUtf8Alloc`/`utf16leToUtf8AllocZ`/`utf8ToUtf16LeWithNull` return an error and adds relevant test cases
2026-02-19 15:58:50 +00:00 · 2021-09-15 17:31:40 -07:00 · 2021-09-15 17:31:40 -07:00 · db940a2c81
commit db940a2c81
parent e1bf350b4d
1 changed files with 21 additions and 3 deletions
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@ -553,8 +553,9 @@ fn testDecode(bytes: []const u8) !u21 {
 /// Caller must free returned memory.
 pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8 {
    var result = std.ArrayList(u8).init(allocator);
+    errdefer result.deinit();
    // optimistically guess that it will all be ascii.
-    try result.ensureCapacity(utf16le.len);
+    try result.ensureTotalCapacity(utf16le.len);
    var out_index: usize = 0;
    var it = Utf16LeIterator.init(utf16le);
    while (try it.nextCodepoint()) |codepoint| {
@ -569,9 +570,10 @@ pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8

 /// Caller must free returned memory.
 pub fn utf16leToUtf8AllocZ(allocator: *mem.Allocator, utf16le: []const u16) ![:0]u8 {
-    var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
+    var result = std.ArrayList(u8).init(allocator);
+    errdefer result.deinit();
    // optimistically guess that it will all be ascii.
-    try result.ensureCapacity(utf16le.len);
+    try result.ensureTotalCapacity(utf16le.len);
    var out_index: usize = 0;
    var it = Utf16LeIterator.init(utf16le);
    while (try it.nextCodepoint()) |codepoint| {
@ -653,10 +655,18 @@ test "utf16leToUtf8" {
        defer std.testing.allocator.free(utf8);
        try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80"));
    }
+
+    {
+        mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xdcdc);
+        mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdcdc);
+        const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
+        try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result);
+    }
 }

 pub fn utf8ToUtf16LeWithNull(allocator: *mem.Allocator, utf8: []const u8) ![:0]u16 {
    var result = std.ArrayList(u16).init(allocator);
+    errdefer result.deinit();
    // optimistically guess that it will not require surrogate pairs
    try result.ensureCapacity(utf8.len + 1);

@ -718,6 +728,10 @@ test "utf8ToUtf16Le" {
        try testing.expectEqual(@as(usize, 2), length);
        try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..]));
    }
+    {
+        const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80");
+        try testing.expectError(error.InvalidUtf8, result);
+    }
 }

 test "utf8ToUtf16LeWithNull" {
@ -733,6 +747,10 @@ test "utf8ToUtf16LeWithNull" {
        try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));
        try testing.expect(utf16[2] == 0);
    }
+    {
+        const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80");
+        try testing.expectError(error.InvalidUtf8, result);
+    }
 }

 /// Converts a UTF-8 string literal into a UTF-16LE string literal.