From aae5560712fb67cea2b1405c4e2b03e9be236678 Mon Sep 17 00:00:00 2001 From: Yefeng Li Date: Mon, 25 Aug 2025 18:07:07 +0100 Subject: [PATCH] Remove memcmp and memset from bundled musl and wasi --- lib/libc/musl/src/string/aarch64/memset.S | 115 ------------------ lib/libc/musl/src/string/arm/__aeabi_memset.s | 31 ----- lib/libc/musl/src/string/i386/memset.s | 76 ------------ lib/libc/musl/src/string/memcmp.c | 8 -- lib/libc/musl/src/string/memset.c | 90 -------------- lib/libc/musl/src/string/x86_64/memset.s | 72 ----------- .../libc-top-half/musl/src/string/memcmp.c | 43 ------- .../libc-top-half/musl/src/string/memset.c | 94 -------------- src/libs/musl.zig | 6 - src/libs/wasi_libc.zig | 2 - 10 files changed, 537 deletions(-) delete mode 100644 lib/libc/musl/src/string/aarch64/memset.S delete mode 100644 lib/libc/musl/src/string/arm/__aeabi_memset.s delete mode 100644 lib/libc/musl/src/string/i386/memset.s delete mode 100644 lib/libc/musl/src/string/memcmp.c delete mode 100644 lib/libc/musl/src/string/memset.c delete mode 100644 lib/libc/musl/src/string/x86_64/memset.s delete mode 100644 lib/libc/wasi/libc-top-half/musl/src/string/memcmp.c delete mode 100644 lib/libc/wasi/libc-top-half/musl/src/string/memset.c diff --git a/lib/libc/musl/src/string/aarch64/memset.S b/lib/libc/musl/src/string/aarch64/memset.S deleted file mode 100644 index f0d29b7fa3..0000000000 --- a/lib/libc/musl/src/string/aarch64/memset.S +++ /dev/null @@ -1,115 +0,0 @@ -/* - * memset - fill memory with a constant byte - * - * Copyright (c) 2012-2020, Arm Limited. - * SPDX-License-Identifier: MIT - */ - -/* Assumptions: - * - * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. - * - */ - -#define dstin x0 -#define val x1 -#define valw w1 -#define count x2 -#define dst x3 -#define dstend x4 -#define zva_val x5 - -.global memset -.type memset,%function -memset: - - dup v0.16B, valw - add dstend, dstin, count - - cmp count, 96 - b.hi .Lset_long - cmp count, 16 - b.hs .Lset_medium - mov val, v0.D[0] - - /* Set 0..15 bytes. */ - tbz count, 3, 1f - str val, [dstin] - str val, [dstend, -8] - ret - nop -1: tbz count, 2, 2f - str valw, [dstin] - str valw, [dstend, -4] - ret -2: cbz count, 3f - strb valw, [dstin] - tbz count, 1, 3f - strh valw, [dstend, -2] -3: ret - - /* Set 17..96 bytes. */ -.Lset_medium: - str q0, [dstin] - tbnz count, 6, .Lset96 - str q0, [dstend, -16] - tbz count, 5, 1f - str q0, [dstin, 16] - str q0, [dstend, -32] -1: ret - - .p2align 4 - /* Set 64..96 bytes. Write 64 bytes from the start and - 32 bytes from the end. */ -.Lset96: - str q0, [dstin, 16] - stp q0, q0, [dstin, 32] - stp q0, q0, [dstend, -32] - ret - - .p2align 4 -.Lset_long: - and valw, valw, 255 - bic dst, dstin, 15 - str q0, [dstin] - cmp count, 160 - ccmp valw, 0, 0, hs - b.ne .Lno_zva - -#ifndef SKIP_ZVA_CHECK - mrs zva_val, dczid_el0 - and zva_val, zva_val, 31 - cmp zva_val, 4 /* ZVA size is 64 bytes. */ - b.ne .Lno_zva -#endif - str q0, [dst, 16] - stp q0, q0, [dst, 32] - bic dst, dst, 63 - sub count, dstend, dst /* Count is now 64 too large. */ - sub count, count, 128 /* Adjust count and bias for loop. */ - - .p2align 4 -.Lzva_loop: - add dst, dst, 64 - dc zva, dst - subs count, count, 64 - b.hi .Lzva_loop - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - -.Lno_zva: - sub count, dstend, dst /* Count is 16 too large. */ - sub dst, dst, 16 /* Dst is biased by -32. */ - sub count, count, 64 + 16 /* Adjust count and bias for loop. */ -.Lno_zva_loop: - stp q0, q0, [dst, 32] - stp q0, q0, [dst, 64]! - subs count, count, 64 - b.hi .Lno_zva_loop - stp q0, q0, [dstend, -64] - stp q0, q0, [dstend, -32] - ret - -.size memset,.-memset - diff --git a/lib/libc/musl/src/string/arm/__aeabi_memset.s b/lib/libc/musl/src/string/arm/__aeabi_memset.s deleted file mode 100644 index f9f605838b..0000000000 --- a/lib/libc/musl/src/string/arm/__aeabi_memset.s +++ /dev/null @@ -1,31 +0,0 @@ -.syntax unified - -.global __aeabi_memclr8 -.global __aeabi_memclr4 -.global __aeabi_memclr -.global __aeabi_memset8 -.global __aeabi_memset4 -.global __aeabi_memset - -.type __aeabi_memclr8,%function -.type __aeabi_memclr4,%function -.type __aeabi_memclr,%function -.type __aeabi_memset8,%function -.type __aeabi_memset4,%function -.type __aeabi_memset,%function - -__aeabi_memclr8: -__aeabi_memclr4: -__aeabi_memclr: - movs r2, #0 -__aeabi_memset8: -__aeabi_memset4: -__aeabi_memset: - cmp r1, #0 - beq 2f - adds r1, r0, r1 -1: strb r2, [r0] - adds r0, r0, #1 - cmp r1, r0 - bne 1b -2: bx lr diff --git a/lib/libc/musl/src/string/i386/memset.s b/lib/libc/musl/src/string/i386/memset.s deleted file mode 100644 index d00422c4ac..0000000000 --- a/lib/libc/musl/src/string/i386/memset.s +++ /dev/null @@ -1,76 +0,0 @@ -.global memset -.type memset,@function -memset: - mov 12(%esp),%ecx - cmp $62,%ecx - ja 2f - - mov 8(%esp),%dl - mov 4(%esp),%eax - test %ecx,%ecx - jz 1f - - mov %dl,%dh - - mov %dl,(%eax) - mov %dl,-1(%eax,%ecx) - cmp $2,%ecx - jbe 1f - - mov %dx,1(%eax) - mov %dx,(-1-2)(%eax,%ecx) - cmp $6,%ecx - jbe 1f - - shl $16,%edx - mov 8(%esp),%dl - mov 8(%esp),%dh - - mov %edx,(1+2)(%eax) - mov %edx,(-1-2-4)(%eax,%ecx) - cmp $14,%ecx - jbe 1f - - mov %edx,(1+2+4)(%eax) - mov %edx,(1+2+4+4)(%eax) - mov %edx,(-1-2-4-8)(%eax,%ecx) - mov %edx,(-1-2-4-4)(%eax,%ecx) - cmp $30,%ecx - jbe 1f - - mov %edx,(1+2+4+8)(%eax) - mov %edx,(1+2+4+8+4)(%eax) - mov %edx,(1+2+4+8+8)(%eax) - mov %edx,(1+2+4+8+12)(%eax) - mov %edx,(-1-2-4-8-16)(%eax,%ecx) - mov %edx,(-1-2-4-8-12)(%eax,%ecx) - mov %edx,(-1-2-4-8-8)(%eax,%ecx) - mov %edx,(-1-2-4-8-4)(%eax,%ecx) - -1: ret - -2: movzbl 8(%esp),%eax - mov %edi,12(%esp) - imul $0x1010101,%eax - mov 4(%esp),%edi - test $15,%edi - mov %eax,-4(%edi,%ecx) - jnz 2f - -1: shr $2, %ecx - rep - stosl - mov 4(%esp),%eax - mov 12(%esp),%edi - ret - -2: xor %edx,%edx - sub %edi,%edx - and $15,%edx - mov %eax,(%edi) - mov %eax,4(%edi) - mov %eax,8(%edi) - mov %eax,12(%edi) - sub %edx,%ecx - add %edx,%edi - jmp 1b diff --git a/lib/libc/musl/src/string/memcmp.c b/lib/libc/musl/src/string/memcmp.c deleted file mode 100644 index bdbce9f0f5..0000000000 --- a/lib/libc/musl/src/string/memcmp.c +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int memcmp(const void *vl, const void *vr, size_t n) -{ - const unsigned char *l=vl, *r=vr; - for (; n && *l == *r; n--, l++, r++); - return n ? *l-*r : 0; -} diff --git a/lib/libc/musl/src/string/memset.c b/lib/libc/musl/src/string/memset.c deleted file mode 100644 index 5613a1486e..0000000000 --- a/lib/libc/musl/src/string/memset.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include - -void *memset(void *dest, int c, size_t n) -{ - unsigned char *s = dest; - size_t k; - - /* Fill head and tail with minimal branching. Each - * conditional ensures that all the subsequently used - * offsets are well-defined and in the dest region. */ - - if (!n) return dest; - s[0] = c; - s[n-1] = c; - if (n <= 2) return dest; - s[1] = c; - s[2] = c; - s[n-2] = c; - s[n-3] = c; - if (n <= 6) return dest; - s[3] = c; - s[n-4] = c; - if (n <= 8) return dest; - - /* Advance pointer to align it at a 4-byte boundary, - * and truncate n to a multiple of 4. The previous code - * already took care of any head/tail that get cut off - * by the alignment. */ - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - -#ifdef __GNUC__ - typedef uint32_t __attribute__((__may_alias__)) u32; - typedef uint64_t __attribute__((__may_alias__)) u64; - - u32 c32 = ((u32)-1)/255 * (unsigned char)c; - - /* In preparation to copy 32 bytes at a time, aligned on - * an 8-byte bounary, fill head/tail up to 28 bytes each. - * As in the initial byte-based head/tail fill, each - * conditional below ensures that the subsequent offsets - * are valid (e.g. !(n<=24) implies n>=28). */ - - *(u32 *)(s+0) = c32; - *(u32 *)(s+n-4) = c32; - if (n <= 8) return dest; - *(u32 *)(s+4) = c32; - *(u32 *)(s+8) = c32; - *(u32 *)(s+n-12) = c32; - *(u32 *)(s+n-8) = c32; - if (n <= 24) return dest; - *(u32 *)(s+12) = c32; - *(u32 *)(s+16) = c32; - *(u32 *)(s+20) = c32; - *(u32 *)(s+24) = c32; - *(u32 *)(s+n-28) = c32; - *(u32 *)(s+n-24) = c32; - *(u32 *)(s+n-20) = c32; - *(u32 *)(s+n-16) = c32; - - /* Align to a multiple of 8 so we can fill 64 bits at a time, - * and avoid writing the same bytes twice as much as is - * practical without introducing additional branching. */ - - k = 24 + ((uintptr_t)s & 4); - s += k; - n -= k; - - /* If this loop is reached, 28 tail bytes have already been - * filled, so any remainder when n drops below 32 can be - * safely ignored. */ - - u64 c64 = c32 | ((u64)c32 << 32); - for (; n >= 32; n-=32, s+=32) { - *(u64 *)(s+0) = c64; - *(u64 *)(s+8) = c64; - *(u64 *)(s+16) = c64; - *(u64 *)(s+24) = c64; - } -#else - /* Pure C fallback with no aliasing violations. */ - for (; n; n--, s++) *s = c; -#endif - - return dest; -} diff --git a/lib/libc/musl/src/string/x86_64/memset.s b/lib/libc/musl/src/string/x86_64/memset.s deleted file mode 100644 index 2d3f5e52b8..0000000000 --- a/lib/libc/musl/src/string/x86_64/memset.s +++ /dev/null @@ -1,72 +0,0 @@ -.global memset -.type memset,@function -memset: - movzbq %sil,%rax - mov $0x101010101010101,%r8 - imul %r8,%rax - - cmp $126,%rdx - ja 2f - - test %edx,%edx - jz 1f - - mov %sil,(%rdi) - mov %sil,-1(%rdi,%rdx) - cmp $2,%edx - jbe 1f - - mov %ax,1(%rdi) - mov %ax,(-1-2)(%rdi,%rdx) - cmp $6,%edx - jbe 1f - - mov %eax,(1+2)(%rdi) - mov %eax,(-1-2-4)(%rdi,%rdx) - cmp $14,%edx - jbe 1f - - mov %rax,(1+2+4)(%rdi) - mov %rax,(-1-2-4-8)(%rdi,%rdx) - cmp $30,%edx - jbe 1f - - mov %rax,(1+2+4+8)(%rdi) - mov %rax,(1+2+4+8+8)(%rdi) - mov %rax,(-1-2-4-8-16)(%rdi,%rdx) - mov %rax,(-1-2-4-8-8)(%rdi,%rdx) - cmp $62,%edx - jbe 1f - - mov %rax,(1+2+4+8+16)(%rdi) - mov %rax,(1+2+4+8+16+8)(%rdi) - mov %rax,(1+2+4+8+16+16)(%rdi) - mov %rax,(1+2+4+8+16+24)(%rdi) - mov %rax,(-1-2-4-8-16-32)(%rdi,%rdx) - mov %rax,(-1-2-4-8-16-24)(%rdi,%rdx) - mov %rax,(-1-2-4-8-16-16)(%rdi,%rdx) - mov %rax,(-1-2-4-8-16-8)(%rdi,%rdx) - -1: mov %rdi,%rax - ret - -2: test $15,%edi - mov %rdi,%r8 - mov %rax,-8(%rdi,%rdx) - mov %rdx,%rcx - jnz 2f - -1: shr $3,%rcx - rep - stosq - mov %r8,%rax - ret - -2: xor %edx,%edx - sub %edi,%edx - and $15,%edx - mov %rax,(%rdi) - mov %rax,8(%rdi) - sub %rdx,%rcx - add %rdx,%rdi - jmp 1b diff --git a/lib/libc/wasi/libc-top-half/musl/src/string/memcmp.c b/lib/libc/wasi/libc-top-half/musl/src/string/memcmp.c deleted file mode 100644 index ce313049a9..0000000000 --- a/lib/libc/wasi/libc-top-half/musl/src/string/memcmp.c +++ /dev/null @@ -1,43 +0,0 @@ -#include - -#ifdef __wasm_simd128__ -#include -#endif - -int memcmp(const void *vl, const void *vr, size_t n) -{ -#if defined(__wasm_simd128__) && defined(__wasilibc_simd_string) - if (n >= sizeof(v128_t)) { - // memcmp is allowed to read up to n bytes from each object. - // Find the first different character in the objects. - // Unaligned loads handle the case where the objects - // have mismatching alignments. - const v128_t *v1 = (v128_t *)vl; - const v128_t *v2 = (v128_t *)vr; - while (n) { - const v128_t cmp = wasm_i8x16_eq(wasm_v128_load(v1), wasm_v128_load(v2)); - // Bitmask is slow on AArch64, all_true is much faster. - if (!wasm_i8x16_all_true(cmp)) { - // Find the offset of the first zero bit (little-endian). - size_t ctz = __builtin_ctz(~wasm_i8x16_bitmask(cmp)); - const unsigned char *u1 = (unsigned char *)v1 + ctz; - const unsigned char *u2 = (unsigned char *)v2 + ctz; - // This may help the compiler if the function is inlined. - __builtin_assume(*u1 - *u2 != 0); - return *u1 - *u2; - } - // This makes n a multiple of sizeof(v128_t) - // for every iteration except the first. - size_t align = (n - 1) % sizeof(v128_t) + 1; - v1 = (v128_t *)((char *)v1 + align); - v2 = (v128_t *)((char *)v2 + align); - n -= align; - } - return 0; - } -#endif - - const unsigned char *l=vl, *r=vr; - for (; n && *l == *r; n--, l++, r++); - return n ? *l-*r : 0; -} diff --git a/lib/libc/wasi/libc-top-half/musl/src/string/memset.c b/lib/libc/wasi/libc-top-half/musl/src/string/memset.c deleted file mode 100644 index f64c9cf5ae..0000000000 --- a/lib/libc/wasi/libc-top-half/musl/src/string/memset.c +++ /dev/null @@ -1,94 +0,0 @@ -#include -#include - -void *memset(void *dest, int c, size_t n) -{ -#if defined(__wasm_bulk_memory__) - if (n > BULK_MEMORY_THRESHOLD) - return __builtin_memset(dest, c, n); -#endif - unsigned char *s = dest; - size_t k; - - /* Fill head and tail with minimal branching. Each - * conditional ensures that all the subsequently used - * offsets are well-defined and in the dest region. */ - - if (!n) return dest; - s[0] = c; - s[n-1] = c; - if (n <= 2) return dest; - s[1] = c; - s[2] = c; - s[n-2] = c; - s[n-3] = c; - if (n <= 6) return dest; - s[3] = c; - s[n-4] = c; - if (n <= 8) return dest; - - /* Advance pointer to align it at a 4-byte boundary, - * and truncate n to a multiple of 4. The previous code - * already took care of any head/tail that get cut off - * by the alignment. */ - - k = -(uintptr_t)s & 3; - s += k; - n -= k; - n &= -4; - -#ifdef __GNUC__ - typedef uint32_t __attribute__((__may_alias__)) u32; - typedef uint64_t __attribute__((__may_alias__)) u64; - - u32 c32 = ((u32)-1)/255 * (unsigned char)c; - - /* In preparation to copy 32 bytes at a time, aligned on - * an 8-byte bounary, fill head/tail up to 28 bytes each. - * As in the initial byte-based head/tail fill, each - * conditional below ensures that the subsequent offsets - * are valid (e.g. !(n<=24) implies n>=28). */ - - *(u32 *)(s+0) = c32; - *(u32 *)(s+n-4) = c32; - if (n <= 8) return dest; - *(u32 *)(s+4) = c32; - *(u32 *)(s+8) = c32; - *(u32 *)(s+n-12) = c32; - *(u32 *)(s+n-8) = c32; - if (n <= 24) return dest; - *(u32 *)(s+12) = c32; - *(u32 *)(s+16) = c32; - *(u32 *)(s+20) = c32; - *(u32 *)(s+24) = c32; - *(u32 *)(s+n-28) = c32; - *(u32 *)(s+n-24) = c32; - *(u32 *)(s+n-20) = c32; - *(u32 *)(s+n-16) = c32; - - /* Align to a multiple of 8 so we can fill 64 bits at a time, - * and avoid writing the same bytes twice as much as is - * practical without introducing additional branching. */ - - k = 24 + ((uintptr_t)s & 4); - s += k; - n -= k; - - /* If this loop is reached, 28 tail bytes have already been - * filled, so any remainder when n drops below 32 can be - * safely ignored. */ - - u64 c64 = c32 | ((u64)c32 << 32); - for (; n >= 32; n-=32, s+=32) { - *(u64 *)(s+0) = c64; - *(u64 *)(s+8) = c64; - *(u64 *)(s+16) = c64; - *(u64 *)(s+24) = c64; - } -#else - /* Pure C fallback with no aliasing violations. */ - for (; n; n--, s++) *s = c; -#endif - - return dest; -} diff --git a/src/libs/musl.zig b/src/libs/musl.zig index 750252aa56..32bf642262 100644 --- a/src/libs/musl.zig +++ b/src/libs/musl.zig @@ -1786,20 +1786,15 @@ const src_files = [_][]const u8{ "musl/src/stdlib/strtol.c", "musl/src/stdlib/wcstod.c", "musl/src/stdlib/wcstol.c", - "musl/src/string/aarch64/memset.S", - "musl/src/string/arm/__aeabi_memset.s", "musl/src/string/bcmp.c", "musl/src/string/bcopy.c", "musl/src/string/explicit_bzero.c", - "musl/src/string/i386/memset.s", "musl/src/string/index.c", "musl/src/string/memccpy.c", "musl/src/string/memchr.c", - "musl/src/string/memcmp.c", "musl/src/string/memmem.c", "musl/src/string/mempcpy.c", "musl/src/string/memrchr.c", - "musl/src/string/memset.c", "musl/src/string/rindex.c", "musl/src/string/stpcpy.c", "musl/src/string/stpncpy.c", @@ -1855,7 +1850,6 @@ const src_files = [_][]const u8{ "musl/src/string/wmemcpy.c", "musl/src/string/wmemmove.c", "musl/src/string/wmemset.c", - "musl/src/string/x86_64/memset.s", "musl/src/temp/mkdtemp.c", "musl/src/temp/mkostemp.c", "musl/src/temp/mkostemps.c", diff --git a/src/libs/wasi_libc.zig b/src/libs/wasi_libc.zig index 4af86f28f2..1c1d130a05 100644 --- a/src/libs/wasi_libc.zig +++ b/src/libs/wasi_libc.zig @@ -1221,9 +1221,7 @@ const libc_top_half_src_files = [_][]const u8{ "wasi/libc-top-half/musl/src/stdlib/wcstod.c", "wasi/libc-top-half/musl/src/stdlib/wcstol.c", "wasi/libc-top-half/musl/src/string/memchr.c", - "wasi/libc-top-half/musl/src/string/memcmp.c", "wasi/libc-top-half/musl/src/string/memrchr.c", - "wasi/libc-top-half/musl/src/string/memset.c", "wasi/libc-top-half/musl/src/string/strchrnul.c", "wasi/libc-top-half/musl/src/thread/pthread_attr_get.c", "wasi/libc-top-half/musl/src/thread/pthread_attr_setguardsize.c",