translate-c: Add support for pointer subtraction

When two pointers are subtracted, both shall point to elements of the same array object, or one past the last element of the array object; the result is the difference of the subscripts of the two array elements. The size of the result is implementation-defined, and its type (a signed integer type) is ptrdiff_t defined in the <stddef.h> header. If the result is not representable in an object of that type, the behavior is undefined. See C Standard, §6.5.6 [ISO/IEC 9899:2011] Fixes #7216
2025-12-27 00:23:22 +00:00 · 2021-02-22 13:09:51 -08:00 · 2021-02-22 13:09:51 -08:00 · 4f11a88b9f
commit 4f11a88b9f
parent 53cc63f0c9
3 changed files with 143 additions and 2 deletions
--- a/src/translate_c.zig
+++ b/src/translate_c.zig
@ -1083,6 +1083,7 @@ fn transBinaryOperator(
 ) TransError!Node {
    const op = stmt.getOpcode();
    const qt = stmt.getType();
+    const isPointerDiffExpr = cIsPointerDiffExpr(c, stmt);
    switch (op) {
        .Assign => return try transCreateNodeAssign(c, scope, result_used, stmt.getLHS(), stmt.getRHS()),
        .Comma => {
@ -1143,7 +1144,7 @@ fn transBinaryOperator(
            }
        },
        .Sub => {
-            if (cIsUnsignedInteger(qt)) {
+            if (cIsUnsignedInteger(qt) or isPointerDiffExpr) {
                op_id = .sub_wrap;
            } else {
                op_id = .sub;
@ -1199,15 +1200,40 @@ fn transBinaryOperator(

    const lhs = if (isBoolRes(lhs_uncasted))
        try Tag.bool_to_int.create(c.arena, lhs_uncasted)
+    else if (isPointerDiffExpr)
+        try Tag.ptr_to_int.create(c.arena, lhs_uncasted)
    else
        lhs_uncasted;

    const rhs = if (isBoolRes(rhs_uncasted))
        try Tag.bool_to_int.create(c.arena, rhs_uncasted)
+    else if (isPointerDiffExpr)
+        try Tag.ptr_to_int.create(c.arena, rhs_uncasted)
    else
        rhs_uncasted;

-    return transCreateNodeInfixOp(c, scope, op_id, lhs, rhs, result_used);
+    const infixOpNode = try transCreateNodeInfixOp(c, scope, op_id, lhs, rhs, result_used);
+    if (isPointerDiffExpr) {
+        // @divExact(@bitCast(<platform-ptrdiff_t>, @ptrToInt(lhs) -% @ptrToInt(rhs)), @sizeOf(<lhs target type>))
+        const ptrdiff_type = try transQualTypeIntWidthOf(c, qt, true);
+
+        // C standard requires that pointer subtraction operands are of the same type,
+        // otherwise it is undefined behavior. So we can assume the left and right
+        // sides are the same QualType and arbitrarily choose left.
+        const lhs_expr = stmt.getLHS();
+        const lhs_qt = getExprQualType(c, lhs_expr);
+        const lhs_qt_translated = try transQualType(c, scope, lhs_qt, lhs_expr.getBeginLoc());
+        const elem_type = lhs_qt_translated.castTag(.c_pointer).?.data.elem_type;
+        const sizeof = try Tag.sizeof.create(c.arena, elem_type);
+
+        const bitcast = try Tag.bit_cast.create(c.arena, .{ .lhs = ptrdiff_type, .rhs = infixOpNode });
+
+        return Tag.div_exact.create(c.arena, .{
+            .lhs = bitcast,
+            .rhs = sizeof,
+        });
+    }
+    return infixOpNode;
 }

 fn transCompoundStmtInline(
@ -1683,6 +1709,17 @@ fn transStringLiteralAsArray(
    });
 }

+/// determine whether `stmt` is a "pointer subtraction expression" - a subtraction where
+/// both operands resolve to addresses. The C standard requires that both operands
+/// point to elements of the same array object, but we do not verify that here.
+fn cIsPointerDiffExpr(c: *Context, stmt: *const clang.BinaryOperator) bool {
+    const lhs = @ptrCast(*const clang.Stmt, stmt.getLHS());
+    const rhs = @ptrCast(*const clang.Stmt, stmt.getRHS());
+    return stmt.getOpcode() == .Sub and
+        qualTypeIsPtr(@ptrCast(*const clang.Expr, lhs).getType()) and
+        qualTypeIsPtr(@ptrCast(*const clang.Expr, rhs).getType());
+}
+
 fn cIsEnum(qt: clang.QualType) bool {
    return qt.getCanonicalType().getTypeClass() == .Enum;
 }
--- a/src/translate_c/ast.zig
+++ b/src/translate_c/ast.zig
@ -146,6 +146,8 @@ pub const Node = extern union {
        align_cast,
        /// @ptrCast(lhs, rhs)
        ptr_cast,
+        /// @divExact(lhs, rhs)
+        div_exact,

        negate,
        negate_wrap,
@ -300,6 +302,7 @@ pub const Node = extern union {
                .array_access,
                .std_mem_zeroinit,
                .ptr_cast,
+                .div_exact,
                => Payload.BinOp,

                .integer_literal,
@ -1128,6 +1131,10 @@ fn renderNode(c: *Context, node: Node) Allocator.Error!NodeIndex {
            const payload = node.castTag(.ptr_cast).?.data;
            return renderBuiltinCall(c, "@ptrCast", &.{ payload.lhs, payload.rhs });
        },
+        .div_exact => {
+            const payload = node.castTag(.div_exact).?.data;
+            return renderBuiltinCall(c, "@divExact", &.{ payload.lhs, payload.rhs });
+        },
        .sizeof => {
            const payload = node.castTag(.sizeof).?.data;
            return renderBuiltinCall(c, "@sizeOf", &.{payload});
@ -1993,6 +2000,7 @@ fn renderNodeGrouped(c: *Context, node: Node) !NodeIndex {
        .call,
        .array_type,
        .bool_to_int,
+        .div_exact,
        => {
            // no grouping needed
            return renderNode(c, node);
--- a/test/run_translated_c.zig
+++ b/test/run_translated_c.zig
@ -958,4 +958,100 @@ pub fn addCases(cases: *tests.RunTranslatedCContext) void {
        \\    return 0;
        \\}
    , "");
+
+    cases.add("pointer difference: scalar array w/ size truncation or negative result. Issue #7216",
+        \\#include <stdlib.h>
+        \\#include <stddef.h>
+        \\#define SIZE 10
+        \\int main() {
+        \\    int foo[SIZE];
+        \\    int *start = &foo[0];
+        \\    int *one_past_end = start + SIZE;
+        \\    ptrdiff_t diff = one_past_end - start;
+        \\    char diff_char = one_past_end - start;
+        \\    if (diff != SIZE || diff_char != SIZE) abort();
+        \\    diff = start - one_past_end;
+        \\    if (diff != -SIZE) abort();
+        \\    if (one_past_end - foo != SIZE) abort();
+        \\    if ((one_past_end - 1) - foo != SIZE - 1) abort();
+        \\    if ((start + 1) - foo != 1) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    // C standard: if the expression P points either to an element of an array object or one
+    // past the last element of an array object, and the expression Q points to the last
+    // element of the same array object, the expression ((Q)+1)-(P) has the same value as
+    // ((Q)-(P))+1 and as -((P)-((Q)+1)), and has the value zero if the expression P points
+    // one past the last element of the array object, even though the expression (Q)+1
+    // does not point to an element of the array object
+    cases.add("pointer difference: C standard edge case",
+        \\#include <stdlib.h>
+        \\#include <stddef.h>
+        \\#define SIZE 10
+        \\int main() {
+        \\    int foo[SIZE];
+        \\    int *start = &foo[0];
+        \\    int *P = start + SIZE;
+        \\    int *Q = &foo[SIZE - 1];
+        \\    if ((Q + 1) - P != 0) abort();
+        \\    if ((Q + 1) - P != (Q - P) + 1) abort();
+        \\    if ((Q + 1) - P != -(P - (Q + 1))) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("pointer difference: unary operators",
+        \\#include <stdlib.h>
+        \\int main() {
+        \\    int foo[10];
+        \\    int *x = &foo[1];
+        \\    const int *y = &foo[5];
+        \\    if (y - x++ != 4) abort();
+        \\    if (y - x != 3) abort();
+        \\    if (y - ++x != 2) abort();
+        \\    if (y - x-- != 2) abort();
+        \\    if (y - x != 3) abort();
+        \\    if (y - --x != 4) abort();
+        \\    if (y - &foo[0] != 5) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("pointer difference: struct array with padding",
+        \\#include <stdlib.h>
+        \\#include <stddef.h>
+        \\#define SIZE 10
+        \\typedef struct my_struct {
+        \\    int x;
+        \\    char c;
+        \\    int y;
+        \\} my_struct_t;
+        \\int main() {
+        \\    my_struct_t foo[SIZE];
+        \\    my_struct_t *start = &foo[0];
+        \\    my_struct_t *one_past_end = start + SIZE;
+        \\    ptrdiff_t diff = one_past_end - start;
+        \\    int diff_int = one_past_end - start;
+        \\    if (diff != SIZE || diff_int != SIZE) abort();
+        \\    diff = start - one_past_end;
+        \\    if (diff != -SIZE) abort();
+        \\    return 0;
+        \\}
+    , "");
+
+    cases.add("pointer difference: array of function pointers",
+        \\#include <stdlib.h>
+        \\int a(void) { return 1;}
+        \\int b(void) { return 2;}
+        \\int c(void) { return 3;}
+        \\typedef int (*myfunc)(void);
+        \\int main() {
+        \\    myfunc arr[] = {a, b, c, a, b, c};
+        \\    myfunc *f1 = &arr[1];
+        \\    myfunc *f4 = &arr[4];
+        \\    if (f4 - f1 != 3) abort();
+        \\    return 0;
+        \\}
+    , "");
 }