Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions src/IROperator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,12 +609,23 @@ Expr lossless_negate(const Expr &x) {
} else if (const FloatImm *f = x.as<FloatImm>()) {
return FloatImm::make(f->type, -f->value);
} else if (const Cast *c = x.as<Cast>()) {
Expr value = lossless_negate(c->value);
if (value.defined()) {
// This logic is only sound if we know the cast can't overflow.
value = lossless_cast(c->type, value);
// Unsigned inner types wrap modularly (-uint8(65) = 191), and signed
// integer inner types wrap at INT_TYPE_MIN (-int8(-128) = -128), so both
// make cast(outer, -inner) != -cast(outer, inner). Floats are exact.
// For signed integers, only proceed when bounds exclude INT_TYPE_MIN.
bool inner_negation_safe = c->value.type().is_float();
if (!inner_negation_safe && c->value.type().is_int()) {
ConstantInterval ci = constant_integer_bounds(c->value);
inner_negation_safe = ci.min_defined && !c->value.type().is_min(ci.min);
}
if (inner_negation_safe) {
Expr value = lossless_negate(c->value);
if (value.defined()) {
return value;
// This logic is only sound if we know the cast can't overflow.
value = lossless_cast(c->type, value);
if (value.defined()) {
return value;
}
}
}
} else if (const Ramp *r = x.as<Ramp>()) {
Expand Down
171 changes: 171 additions & 0 deletions test/correctness/lossless_cast.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,177 @@ int main(int argc, char **argv) {
e = make_reduce(UInt(8), VectorReduce::Max);
found_error |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::Or));

// Runtime test: verify that lossless_cast of a widening_sub expression
// evaluates correctly when vectorized. This is a regression test for a bug
// in lossless_negate where it incorrectly negated through an unsigned-to-signed
// cast, causing FindIntrinsics to generate wrong code for the vectorized case.
{
Var x("x");
Buffer<uint8_t> buf(1024, "buf");
for (int i = 0; i < 1024; i++) {
buf(i) = (uint8_t)i;
}

// A = int8(-16 + 32 / int8(buf(x)))
Expr a = cast(Int(8), -16) + cast(Int(8), 32) / cast(Int(8), cast(UInt(8), buf(x)));
// inner = (a / -33_i8) * -33_i8 (in int8 Euclidean arithmetic)
Expr inner = (a / cast(Int(8), -33)) * cast(Int(8), -33);
// b = 223_u8 * uint8(inner)
Expr b = cast(UInt(8), 223) * cast(UInt(8), inner);

// e1: (int64)widening_sub(int32(int16(a)), int32(uint16(b)))
Expr e1 = cast(Int(64), widening_sub(cast(Int(32), cast(Int(16), a)),
cast(Int(32), cast(UInt(16), b))));

// lossless_cast to int16 - the returned expression should evaluate
// identically to e1 when vectorized.
Expr e2 = lossless_cast(Int(16), e1);
if (!e2.defined()) {
std::cerr << "Runtime regression test: lossless_cast unexpectedly returned undefined\n";
found_error = true;
} else {
Func f;
f(x) = {e1, cast(Int(64), e2)};
f.vectorize(x, 4, TailStrategy::RoundUp);

Buffer<int64_t> out1(1024), out2(1024);
Pipeline p(f);
p.realize({out1, out2});

for (int i = 0; i < 1024; i++) {
if (out1(i) != out2(i)) {
std::cerr << "Runtime regression test: mismatch at x=" << i
<< ": original=" << out1(i)
<< " lossless_cast=" << out2(i) << "\n";
found_error = true;
break;
}
}
}
}

// Static tests for the Cast case in lossless_negate with signed integer inner types.
// We use Cast::make (not the cast() function) to prevent constant folding of integer
// literals, so that lossless_negate actually sees a Cast node.
//
// The invariant: lossless_negate(Cast(outer, inner)) must return Expr() when the
// inner expression's bounds include INT_TYPE_MIN, because:
// cast(outer, -int8(-128)) = cast(outer, -128) [wraps in int8]
// ≠ -(cast(outer, -128)) = -(outer)(-128) [exact in outer type]
{
// Inner = Int(8)(-128) = INT8_MIN. Pushing negation through would give
// cast(int16, -int8(-128)) = cast(int16, -128) = -128, not the correct 128.
Expr neg_i8_min = lossless_negate(Cast::make(Int(16), IntImm::make(Int(8), -128)));
if (neg_i8_min.defined()) {
std::cerr << "Int(8) INT_MIN cast-negate test: expected Expr(), got " << neg_i8_min << "\n";
found_error = true;
}

// Inner = Int(16)(-32768) = INT16_MIN. Same reasoning.
Expr neg_i16_min = lossless_negate(Cast::make(Int(32), IntImm::make(Int(16), -32768)));
if (neg_i16_min.defined()) {
std::cerr << "Int(16) INT_MIN cast-negate test: expected Expr(), got " << neg_i16_min << "\n";
found_error = true;
}

// Non-INT_MIN: lossless_negate(Cast(Int(16), Int(8)(-127))) must return
// 127_i16 since -127 ≠ INT8_MIN so negation is exact.
// lossless_negate(int8(-127)) = int8(127); lossless_cast(int16, int8(127))
// constant-folds to the literal 127_i16.
Expr neg_i8_ok = lossless_negate(Cast::make(Int(16), IntImm::make(Int(8), -127)));
Expr expected_ok = IntImm::make(Int(16), 127);
if (!neg_i8_ok.defined() || !equal(neg_i8_ok, expected_ok)) {
std::cerr << "Int(8) non-INT_MIN cast-negate test: expected " << expected_ok
<< ", got " << neg_i8_ok << "\n";
found_error = true;
}
}

// Runtime tests: verify that widening_sub expressions containing values that
// include INT8_MIN / INT16_MIN evaluate correctly when lossless_cast narrows
// them. The Cast case in lossless_negate must reject the inner signed int when
// its bounds include INT_TYPE_MIN and let the code fall back to widening_sub.
{
Var x("x");
Buffer<uint8_t> buf(16, "buf");
for (int i = 0; i < 16; i++) {
buf(i) = (uint8_t)(i * 16); // buf(8)=128 → cast(int8, 128) = -128 = INT8_MIN
}

Expr inner_i8 = cast(Int(8), cast(UInt(8), buf(x)));
Expr a_i16 = cast(Int(16), inner_i8);
Expr b_i16 = cast(Int(16), inner_i8);

// e1: widening_sub as the reference — correct regardless of INT_MIN
Expr e1 = widening_sub(a_i16, b_i16);

// e2: lossless_cast(Int(16), e1) = a_i16 - b_i16. When vectorized,
// FindIntrinsics calls lossless_negate(cast(Int(16), inner_i8)) on the
// subtrahend. Because inner_i8 can be INT8_MIN, the Cast case must reject
// it and fall through to widening_sub.
Expr e2 = lossless_cast(Int(16), e1);
if (!e2.defined()) {
std::cerr << "Signed Int(8) runtime test: lossless_cast unexpectedly returned undefined\n";
found_error = true;
} else {
Func f;
f(x) = {e1, cast(Int(32), e2)};
f.vectorize(x, 4, TailStrategy::RoundUp);

Buffer<int32_t> out1(16), out2(16);
Pipeline p(f);
p.realize({out1, out2});

for (int i = 0; i < 16; i++) {
if (out1(i) != out2(i)) {
std::cerr << "Signed Int(8) runtime test: mismatch at x=" << i
<< ": widening_sub=" << out1(i)
<< " lossless_cast=" << out2(i) << "\n";
found_error = true;
break;
}
}
}
}

// Same test for Int(16), where INT16_MIN = -32768.
{
Var x("x");
Buffer<int16_t> buf(16, "buf");
for (int i = 0; i < 16; i++) {
buf(i) = (int16_t)(i * 4096 - 32768); // -32768, -28672, ..., 28672 (includes INT16_MIN)
}

Expr inner_i16 = cast(Int(16), buf(x));
Expr a_i32 = cast(Int(32), inner_i16);
Expr b_i32 = cast(Int(32), inner_i16);

Expr e1 = widening_sub(a_i32, b_i32);
Expr e2 = lossless_cast(Int(32), e1);
if (!e2.defined()) {
std::cerr << "Signed Int(16) runtime test: lossless_cast unexpectedly returned undefined\n";
found_error = true;
} else {
Func f;
f(x) = {e1, cast(Int(64), e2)};
f.vectorize(x, 4, TailStrategy::RoundUp);

Buffer<int64_t> out1(16), out2(16);
Pipeline p(f);
p.realize({out1, out2});

for (int i = 0; i < 16; i++) {
if (out1(i) != out2(i)) {
std::cerr << "Signed Int(16) runtime test: mismatch at x=" << i
<< ": widening_sub=" << out1(i)
<< " lossless_cast=" << out2(i) << "\n";
found_error = true;
break;
}
}
}
}

if (found_error) {
return 1;
}
Expand Down
Loading