From 3c389368506a78f504e10490fff78eb0a05e57e8 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Sun, 3 May 2026 16:24:09 +0200 Subject: [PATCH 1/2] NXP backend: Test `max_pool2d` with new Neutron flow. --- backends/nxp/backend/edge_helper.py | 4 +- .../max_pool2d_with_indices_converter.py | 75 ++++++++----- .../test_max_pool_2d_converter.py | 105 +++++++++++++++++- 3 files changed, 156 insertions(+), 28 deletions(-) diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py index 23924d364f4..0de0a5b0679 100644 --- a/backends/nxp/backend/edge_helper.py +++ b/backends/nxp/backend/edge_helper.py @@ -441,8 +441,10 @@ def output_quantization_type( │ """ users = list(node.users) - if len(users) == 1: + if output_index is None: + # Basic QDQ case (without getitem nodes). if not _is_quantize(quantize_node := users[0]): + # Broken QDQ schema. return None else: # Multiple users diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py index d8b3cdb3707..e300d6bbe9f 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -6,6 +6,7 @@ import operator import numpy as np +import torch from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.conversion import ( @@ -73,32 +74,54 @@ def _is_supported_on_target( MaxPool2DWithIndicesConverter._get_node_args(node) ) - output_shape = node.meta["val"][0].shape # Shape of the main output (index 0) - if output_shape[0] != 1: - # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 - return False - - # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. - stride_h = stride[0] - if stride_h not in (1, 2): - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 - return False - - channels = output_shape[1] - if channels % neutron_target_spec.get_num_macs() != 0: - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 - return False - - if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 - # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 - - # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the - # effective kernel size, which is an even stricter requirement than what Neutron imposes. - # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 - return False + if custom_delegation_options.use_new_flow_neutron_c: + # Requirements specified by the new Neutron flow documentation. + + supported_types = [torch.int8, torch.uint8] + if not NodeConverter.uses_quantization_type_for_io( + node, supported_types, [0], [0] + ): + return False + + maximum_supported_kernel_size = 4096 + # If there is no padding, Neutron allows maximum stride of 4096. Otherwise, it's 32. But the converter + # always inserts a `Pad` operator to add the padding, so the `MaxPool` never pads it's input itself, so + # 4096 is always the limit. And similarly, the `MaxPool` input padding limitation does not apply either. + maximum_supported_stride = 4096 + + if any(k > maximum_supported_kernel_size for k in kernel_size): + return False + if any(s > maximum_supported_stride for s in stride): + return False + + else: + # Shape of the main output (index 0) + output_shape = node.meta["val"][0].shape + if output_shape[0] != 1: + # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 + return False + + # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. + stride_h = stride[0] + if stride_h not in (1, 2): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 + return False + + channels = output_shape[1] + if channels % neutron_target_spec.get_num_macs() != 0: + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 + return False + + if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 + + # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the + # effective kernel size, which is an even stricter requirement than what Neutron imposes. + # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 + return False return True diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 6bb1000b38b..eb532c1db42 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -19,6 +19,8 @@ ToChannelFirstPreprocess, ToChannelLastPreprocess, ) +from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier +from executorch.backends.nxp.tests.nsys_testing import lower_run_compare from executorch.backends.nxp.tests.use_qat import * # noqa F403 # noinspection PyProtectedMember @@ -47,7 +49,7 @@ def forward(self, x): class MaxPool2dModule(torch.nn.Module): - def __init__(self, kernel_size=3, **kwargs): + def __init__(self, kernel_size: int | tuple[int, ...] = 3, **kwargs): super().__init__() self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) @@ -250,3 +252,104 @@ def test_max_pool_2d__from_1d(self, mocker): tflite_input_preprocess=ToChannelLastPreprocess(), tflite_output_preprocess=ToChannelFirstPreprocess(), ) + + +class TestMaxPool2DNewNeutronFlow: + # noinspection PyMethodMayBeStatic + def assert_delegated(self, model, input_shape): + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, # Delegated MaxPool. + exp_non_delegated_nodes=[], + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + ) + + # noinspection PyMethodMayBeStatic + def assert_not_delegated(self, model, input_shape): + delegated_ep = to_quantized_edge_program( + model, input_shape, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `max_pool2d` was NOT delegated. + assert not graph_contains_any_of_ops( + delegated_ep.graph, [ExecutorchDelegateCall] + ) + assert graph_contains_any_of_ops(delegated_ep.graph, [MaxPool2D]) + + def test__basic_nsys_inference(self): + input_shape = (2, 4, 6, 7) # The old flow limited the batch size to 1. + model = MaxPool2dModule() + self.assert_delegated(model, input_shape) + + def test__kernel_size_limit(self): + kernel_size = (1, 4096) + input_shape = (1, 4) + kernel_size + model = MaxPool2dModule(kernel_size) + self.assert_delegated(model, input_shape) + + def test__kernel_size_limit_exceeded(self): + kernel_size = (1, 4097) # Exceeds the kernel size limit. + input_shape = (1, 4) + kernel_size + model = MaxPool2dModule(kernel_size) + self.assert_not_delegated(model, input_shape) + + def test__stride_limit__no_padding(self): + stride = 4096 + input_shape = (1, 4, 1, 4096) + model = MaxPool2dModule(1, stride=stride) + self.assert_delegated(model, input_shape) + + def test__stride_limit_exceeded__no_padding(self): + stride = 4097 # Exceeds the stride limit. + input_shape = (1, 4, 1, 4096) + model = MaxPool2dModule(1, stride=stride) + self.assert_not_delegated(model, input_shape) + + def test__stride_limit__padding(self): + padding = 1 + stride = 4096 + input_shape = (1, 2, 3, stride) + model = MaxPool2dModule(3, stride=stride, padding=padding) + self.assert_delegated(model, input_shape) + + def test__stride_limit_exceeded__padding(self): + padding = 1 + stride = 4097 # Exceeds the stride limit. + input_shape = (1, 2, 3, stride) + model = MaxPool2dModule(3, stride=stride, padding=padding) + self.assert_not_delegated(model, input_shape) + + @pytest.mark.skip( + reason="Large padding requires large kernel size which results in an extremely slow test." + ) + def test__padding_limit(self): + # As the padding is added wia a `Pad` operator (not the `MaxPool` arguments), there is no limit to the padded + # value. But as padding can be at most half of the kernel size (PyTorch requirement) and kernel size is limited + # to 4096, padding of 2048 is the limit. + padding = 2048 + kernel_size = padding * 2 + input_shape = (1, 1, 2, 3) + model = MaxPool2dModule(kernel_size, padding=padding) + self.assert_delegated(model, input_shape) + + def test__padding__max_pool_limit_exceeded(self): + # NeutronIR `MaxPool` padding is limited to 32. But as it is added by the `Pad` operator instead, there is no + # limit. This tests ensures the `MaxPool` padding limit is not a problem. + padding = 33 + kernel_size = padding * 2 + input_shape = (1, 2, 3, 4) + model = MaxPool2dModule(kernel_size, padding=padding) + self.assert_delegated(model, input_shape) + + def test__padding_to_kernel_ratio_exceeded(self): + # Both PyTorch and Neutron require the padding to be at most half of the kernel size. + kernel_size = 3 + padding = 2 # More than half of the kernel size. + input_shape = (1, 2, 3, 4) + model = MaxPool2dModule(kernel_size, padding=padding) + with pytest.raises( + RuntimeError, match="pad should be at most half of effective kernel size" + ): + to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True) From 7096412872e155f9b193fa4d8de4dce4f479f4b8 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Mon, 4 May 2026 09:34:32 +0200 Subject: [PATCH 2/2] NXP backend: Test avg_pool1d and max_pool1d with new Neutron flow. --- .../test_avg_pool2d_converter.py | 25 ++++++++++++++++- .../test_max_pool_2d_converter.py | 27 +++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py index e1766c3aabd..00c68376bd3 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py @@ -28,7 +28,10 @@ ToNCHWPreprocess, ToNHWCPreprocess, ) -from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier +from executorch.backends.nxp.tests.graph_verifier import ( + BaseGraphVerifier, + NonDelegatedNode, +) from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule from executorch.backends.nxp.tests.nsys_testing import lower_run_compare @@ -370,3 +373,23 @@ def test__stride_limit_exceeded(self): delegated_ep.graph, [ExecutorchDelegateCall] ) assert graph_contains_any_of_ops(delegated_ep.graph, [AvgPool2D]) + + +class TestAvgPool1DNewNeutronFlow: + + # Just a basic test to verify that the operator gets extended to the 2D variant correctly. + def test__basic_nsys_inference__view_not_delegated(self): + input_shape = (2, 4, 6) # The old flow limited the batch size to 1. + model = AvgPool1DModule() + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, # Delegated AvgPool. + exp_non_delegated_nodes=[ + NonDelegatedNode( + "aten_view_copy_default", 2 + ) # Non delegated due to shape requirements. + ], + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index eb532c1db42..88efb6b7825 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -6,7 +6,6 @@ import operator import numpy as np -import pytest import torch from executorch.backends.nxp.backend.edge_program_converter import ( @@ -19,9 +18,13 @@ ToChannelFirstPreprocess, ToChannelLastPreprocess, ) -from executorch.backends.nxp.tests.graph_verifier import BaseGraphVerifier +from executorch.backends.nxp.tests.graph_verifier import ( + BaseGraphVerifier, + NonDelegatedNode, +) from executorch.backends.nxp.tests.nsys_testing import lower_run_compare from executorch.backends.nxp.tests.use_qat import * # noqa F403 +import pytest # noinspection PyProtectedMember from executorch.exir.dialects._ops import ops as exir_ops @@ -353,3 +356,23 @@ def test__padding_to_kernel_ratio_exceeded(self): RuntimeError, match="pad should be at most half of effective kernel size" ): to_quantized_edge_program(model, input_shape, use_new_flow_neutron_c=True) + + +class TestMaxPool1DNewNeutronFlow: + + # Just a basic test to verify that the operator gets extended to the 2D variant correctly. + def test__basic_nsys_inference__view_not_delegated(self): + input_shape = (2, 4, 6) # The old flow limited the batch size to 1. + model = MaxPool1DModule() + graph_verifier = BaseGraphVerifier( + exp_num_delegate_call_nodes=1, # Delegated MaxPool. + exp_non_delegated_nodes=[ + NonDelegatedNode( + "aten_view_copy_default", 2 + ) # Non delegated due to shape requirements. + ], + ) + + lower_run_compare( + model, input_shape, graph_verifier, use_new_flow_neutron_c=True + )