From 3c389368506a78f504e10490fff78eb0a05e57e8 Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Sun, 3 May 2026 16:24:09 +0200
Subject: [PATCH 1/2] NXP backend: Test `max_pool2d` with new Neutron flow.

---
 backends/nxp/backend/edge_helper.py           |   4 +-
 .../max_pool2d_with_indices_converter.py      |  75 ++++++++-----
 .../test_max_pool_2d_converter.py             | 105 +++++++++++++++++-
 3 files changed, 156 insertions(+), 28 deletions(-)
diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py
index 23924d364f4..0de0a5b0679 100644
--- a/backends/nxp/backend/edge_helper.py
+++ b/backends/nxp/backend/edge_helper.py
@@ -441,8 +441,10 @@ def output_quantization_type(
                                             │ <returned type>
     """
     users = list(node.users)
-    if len(users) == 1:
+    if output_index is None:
+        # Basic QDQ case (without getitem nodes).
         if not _is_quantize(quantize_node := users[0]):
+            # Broken QDQ schema.
             return None
 
     else:  # Multiple users
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
index d8b3cdb3707..e300d6bbe9f 100644
--- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
+++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py
@@ -6,6 +6,7 @@
 import operator
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.edge_helper import try_get_arg
 from executorch.backends.nxp.backend.ir.converter.conversion import (
@@ -73,32 +74,54 @@ def _is_supported_on_target(
             MaxPool2DWithIndicesConverter._get_node_args(node)
         )
 
-        output_shape = node.meta["val"][0].shape  # Shape of the main output (index 0)
-        if output_shape[0] != 1:
-            # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
-            return False
-
-        # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
-        stride_h = stride[0]
-        if stride_h not in (1, 2):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
-            return False
-
-        channels = output_shape[1]
-        if channels % neutron_target_spec.get_num_macs() != 0:
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
-            return False
-
-        if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
-            # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
-
-            # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
-            #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
-            # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            # Requirements specified by the new Neutron flow documentation.
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0], [0]
+            ):
+                return False
+
+            maximum_supported_kernel_size = 4096
+            # If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter
+            #  always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so
+            #  4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either.
+            maximum_supported_stride = 4096
+
+            if any(k > maximum_supported_kernel_size for k in kernel_size):
+                return False
+            if any(s > maximum_supported_stride for s in stride):
+                return False
+
+        else:
+            # Shape of the main output (index 0)
+            output_shape = node.meta["val"][0].shape
+            if output_shape[0] != 1:
+                # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106
+                return False
+
+            # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted.
+            stride_h = stride[0]
+            if stride_h not in (1, 2):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923
+                return False
+
+            channels = output_shape[1]
+            if channels % neutron_target_spec.get_num_macs() != 0:
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925
+                return False
+
+            if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)):
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907
+                # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929
+
+                # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the
+                #  effective kernel size, which is an even stricter requirement than what Neutron imposes.
+                # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489
+                return False
 
         return True
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
index 6bb1000b38b..eb532c1db42 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
@@ -19,6 +19,8 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 # noinspection PyProtectedMember
@@ -47,7 +49,7 @@ def forward(self, x):
 
 
 class MaxPool2dModule(torch.nn.Module):
-    def __init__(self, kernel_size=3, **kwargs):
+    def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs):
         super().__init__()
         self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs)
 
@@ -250,3 +252,104 @@ def test_max_pool_2d__from_1d(self, mocker):
             tflite_input_preprocess=ToChannelLastPreprocess(),
             tflite_output_preprocess=ToChannelFirstPreprocess(),
         )
+
+
+class TestMaxPool2DNewNeutronFlow:
+    # noinspection PyMethodMayBeStatic
+    def assert_delegated(self, model, input_shape):
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
+
+    # noinspection PyMethodMayBeStatic
+    def assert_not_delegated(self, model, input_shape):
+        delegated_ep = to_quantized_edge_program(
+            model, input_shape, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `max_pool2d` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2D])
+
+    def test__basic_nsys_inference(self):
+        input_shape = (2, 4, 6, 7)  # The old flow limited the batch size to 1.
+        model = MaxPool2dModule()
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit(self):
+        kernel_size = (1, 4096)
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_delegated(model, input_shape)
+
+    def test__kernel_size_limit_exceeded(self):
+        kernel_size = (1, 4097)  # Exceeds the kernel size limit.
+        input_shape = (1, 4) + kernel_size
+        model = MaxPool2dModule(kernel_size)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__no_padding(self):
+        stride = 4096
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__no_padding(self):
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 4, 1, 4096)
+        model = MaxPool2dModule(1, stride=stride)
+        self.assert_not_delegated(model, input_shape)
+
+    def test__stride_limit__padding(self):
+        padding = 1
+        stride = 4096
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__stride_limit_exceeded__padding(self):
+        padding = 1
+        stride = 4097  # Exceeds the stride limit.
+        input_shape = (1, 2, 3, stride)
+        model = MaxPool2dModule(3, stride=stride, padding=padding)
+        self.assert_not_delegated(model, input_shape)
+
+    @pytest.mark.skip(
+        reason="Large padding requires large kernel size which results in an extremely slow test."
+    )
+    def test__padding_limit(self):
+        # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded
+        #  value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited
+        #  to 4096, padding of 2048 is the limit.
+        padding = 2048
+        kernel_size = padding * 2
+        input_shape = (1, 1, 2, 3)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding__max_pool_limit_exceeded(self):
+        # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no
+        #  limit. This tests ensures the `MaxPool` padding limit is not a problem.
+        padding = 33
+        kernel_size = padding * 2
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        self.assert_delegated(model, input_shape)
+
+    def test__padding_to_kernel_ratio_exceeded(self):
+        # Both PyTorch and Neutron require the padding to be at most half of the kernel size.
+        kernel_size = 3
+        padding = 2  # More than half of the kernel size.
+        input_shape = (1, 2, 3, 4)
+        model = MaxPool2dModule(kernel_size, padding=padding)
+        with pytest.raises(
+            RuntimeError, match="pad should be at most half of effective kernel size"
+        ):
+            to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True)

From 7096412872e155f9b193fa4d8de4dce4f479f4b8 Mon Sep 17 00:00:00 2001
From: Martin Pavella <martin.pavella@nxp.com>
Date: Mon, 4 May 2026 09:34:32 +0200
Subject: [PATCH 2/2] NXP backend: Test avg_pool1d and max_pool1d with new
 Neutron flow.

---
 .../test_avg_pool2d_converter.py              | 25 ++++++++++++++++-
 .../test_max_pool_2d_converter.py             | 27 +++++++++++++++++--
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
index e1766c3aabd..00c68376bd3 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
@@ -28,7 +28,10 @@
     ToNCHWPreprocess,
     ToNHWCPreprocess,
 )
-from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
 from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
 
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
@@ -370,3 +373,23 @@ def test__stride_limit_exceeded(self):
             delegated_ep.graph, [ExecutorchDelegateCall]
         )
         assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D])
+
+
+class TestAvgPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = AvgPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated AvgPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
index eb532c1db42..88efb6b7825 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
@@ -6,7 +6,6 @@
 import operator
 
 import numpy as np
-import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -19,9 +18,13 @@
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
-from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier
+from executorch.backends.nxp.tests.graph_verifier import (
+    BaseGraphVerifier,
+    NonDelegatedNode,
+)
 from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
+import pytest
 
 # noinspection PyProtectedMember
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -353,3 +356,23 @@ def test__padding_to_kernel_ratio_exceeded(self):
             RuntimeError, match="pad should be at most half of effective kernel size"
         ):
             to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True)
+
+
+class TestMaxPool1DNewNeutronFlow:
+
+    # Just a basic test to verify that the operator gets extended to the 2D variant correctly.
+    def test__basic_nsys_inference__view_not_delegated(self):
+        input_shape = (2, 4, 6)  # The old flow limited the batch size to 1.
+        model = MaxPool1DModule()
+        graph_verifier = BaseGraphVerifier(
+            exp_num_delegate_call_nodes=1,  # Delegated MaxPool.
+            exp_non_delegated_nodes=[
+                NonDelegatedNode(
+                    "aten_view_copy_default", 2
+                )  # Non delegated due to shape requirements.
+            ],
+        )
+
+        lower_run_compare(
+            model, input_shape, graph_verifier, use_new_flow_neutron_c=True
+        )