From 6639b8f12ec72c183753b0350f99b69779d53c82 Mon Sep 17 00:00:00 2001 From: Nitin Jain Date: Wed, 6 May 2026 08:39:51 -0700 Subject: [PATCH] Add a16w8 reduce_sum FVP coverage for Ethos-U85 (#19319) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Adds an a16w8 (int16 IO + int8 weights) sweep for `aten.sum.dim_IntList` reducing the last dim with `keepdim=True`. The new tests `test_sum_dim_intlist_a16w8_{u55,u85}_INT` run on the standard Corstone-300 / Corstone-320 FVP harness. The U85 case surfaces a known numerics issue in the Vela `regor` lowering at int16 IO precision (silent zero output), tracked upstream at https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/issues/23. The Ethos-U55 path uses a different accumulator and is correct on the same OFM rescale. This diff is **additive only**: the `Sum` / `SumDefault` test classes and existing test functions are not modified, except for `skips=` annotations on the four pre-existing `dim_None` parametrize ids that are not bundled-program-serializable and surface only because this diff is the first to register `ops/test_sum.py` in the buck test target list. Test design: - Standard `pipeline.run()` with the same a16w8 kwargs other arm a16w8 tests use (e.g. `test_native_layer_norm_16a8w_u85_INT` in `test_layer_norm.py`): `a16w8_quantization=True, symmetric_io_quantization=True, qtol=128, epsilon=2**-16`. - Numerical comparison is the standard `atol`/`rtol` check from `pipeline.run()` — no SQNR helpers. - The U85 cases are wrapped with `xfails=a16w8_sum_u85_xfails, strict=False`. `strict=False` keeps the test target green both on stock Vela 5.0 (cases XFAIL) and once the upstream Vela fix is in tree (cases XPASS allowed). - `XfailIfNoCorstone320` is intentionally omitted on the new a16w8 U85 test — stacking it with the per-id `xfails=` argument makes the per-id marks not fire (verified empirically in this buck test target). A code comment in the file documents this constraint. Differential Revision: D103667823 --- backends/arm/test/ops/test_sum.py | 83 ++++++++++++++++++++++++++++++- backends/arm/test/targets.bzl | 1 + 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/backends/arm/test/ops/test_sum.py b/backends/arm/test/ops/test_sum.py index 1075055c4f0..91334290975 100644 --- a/backends/arm/test/ops/test_sum.py +++ b/backends/arm/test/ops/test_sum.py @@ -5,6 +5,8 @@ from typing import Callable, Tuple +import pytest + import torch from executorch.backends.arm.test import common @@ -96,7 +98,27 @@ def test_sum_dim_intlist_tosa_INT(test_data: input_t1): pipeline.run() -@common.parametrize("test_data", Sum.test_parameters) +# Pre-existing failure that only surfaced after this diff registered +# ops/test_sum.py in targets.bzl (Meta CI never ran these tests before): +# bundled_program (used by the FVP _INT_1_0 tests) cannot serialize None +# as a model input -- the input flatten/sanity check in +# executorch.devtools.bundled_program.config rejects NoneType. dim=None is +# already covered by the SumDefault class below. Marked skip rather than +# xfail because the per-id xfails= argument does not compose with the +# XfailIfNoCorstone* decorator (verified empirically); common.parametrize +# explicitly supports skips= for "fail markers don't work in buck CI". +_DIM_NONE_SKIP_REASON = ( + "bundled_program cannot serialize None as a model input " + "(pre-existing failure -- only surfaced after ops/test_sum.py was " + "registered in targets.bzl)" +) +_dim_none_skips = { + "dim_None": _DIM_NONE_SKIP_REASON, + "dim_None_4d_tensor": _DIM_NONE_SKIP_REASON, +} + + +@common.parametrize("test_data", Sum.test_parameters, skips=_dim_none_skips) @common.XfailIfNoCorstone300 def test_sum_u55_INT_1_0(test_data: Tuple): pipeline = EthosU55PipelineINT[input_t1]( @@ -108,7 +130,7 @@ def test_sum_u55_INT_1_0(test_data: Tuple): pipeline.run() -@common.parametrize("test_data", Sum.test_parameters) +@common.parametrize("test_data", Sum.test_parameters, skips=_dim_none_skips) @common.XfailIfNoCorstone320 def test_sum_u85_INT_1_0(test_data: Tuple): pipeline = EthosU85PipelineINT[input_t1]( @@ -220,3 +242,60 @@ def test_sum_tosa_FP(test_data: Callable[[], input_t2]): def test_sum_tosa_INT(test_data: Callable[[], input_t2]): pipeline = TosaPipelineINT[input_t1](SumDefault(), test_data(), SumDefault.aten_op) pipeline.run() + + +# a16w8 (int16 IO + int8 weights) coverage for sum.dim_IntList. Surfaces the +# Ethos-U85 int16 ReduceSum silent-zero issue tracked upstream at +# https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/issues/23. + + +class SumLastDim(torch.nn.Module): + """Reduce the last dim with keepdim=True.""" + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x.sum(dim=-1, keepdim=True) + + +a16w8_sum_test_parameters = { + "rank1_16": lambda: (torch.rand(16),), + "rank3_8x1x16": lambda: (torch.rand(8, 1, 16),), + "rank3_4x4x16": lambda: (torch.rand(4, 4, 16),), +} + + +@common.parametrize("test_data", a16w8_sum_test_parameters) +@common.XfailIfNoCorstone300 +def test_sum_dim_intlist_a16w8_u55_INT(test_data: Callable[[], input_t1]): + pipeline = EthosU55PipelineINT[input_t1]( + SumLastDim(), + test_data(), + aten_op, + exir_ops=[], + a16w8_quantization=True, + symmetric_io_quantization=True, + qtol=128, + epsilon=2**-16, + ) + pipeline.run() + + +# All cases hit upstream Vela issue #23 (linked above). strict=False so the +# test target stays green both on stock Vela 5.0 (cases XFAIL) and once the +# Vela fix is in tree (cases XPASS). +@common.parametrize("test_data", a16w8_sum_test_parameters) +@common.XfailIfNoCorstone320 +@pytest.mark.xfail( + reason="Ethos-U85 int16 ReduceSum returns zero (vela#23)", strict=False +) +def test_sum_dim_intlist_a16w8_u85_INT(test_data: Callable[[], input_t1]): + pipeline = EthosU85PipelineINT[input_t1]( + SumLastDim(), + test_data(), + aten_op, + exir_ops=[], + a16w8_quantization=True, + symmetric_io_quantization=True, + qtol=128, + epsilon=2**-16, + ) + pipeline.run() diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl index 6e2539cf2dc..bad8a3eac76 100644 --- a/backends/arm/test/targets.bzl +++ b/backends/arm/test/targets.bzl @@ -30,6 +30,7 @@ def define_arm_tests(): "ops/test_slice.py", "ops/test_sigmoid.py", "ops/test_sub.py", + "ops/test_sum.py", "ops/test_tanh.py", "ops/test_view.py", "ops/test_cos.py",