pytorch · roman-janik-nxp · May 14, 2026 · MartinPavella · May 15, 2026 · MartinPavella
@@ -3,6 +3,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import torch
+
+from executorch.backends.nxp.backend.data_format import NXP_NODE_FORMAT
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     NodeConverter,
@@ -23,11 +26,33 @@ def _is_supported_on_target(
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
-        if NodeConverter.uses_shape_broadcasting(node):
-            # Shape broadcasting may require the addition of `Transpose` ops during conversion.
-            return False
+        if custom_delegation_options.use_new_flow_neutron_c:
+            if not NodeConverter.at_least_one_input_shape_matches_the_output_shape(
+                node
+            ):
+                return False
 
-        return True
+            # If one input is in channel first and ranks of input tensors are not equal, we need to add Transposes
+            # Transpose is currently not supported for new flow
+            if any(
+                input_node.meta[NXP_NODE_FORMAT].is_channels_first()
+                for input_node in node.all_input_nodes
+            ) and NodeConverter._node_inputs_ranks_not_equal(node):
+                return False
+
+            supported_types = [torch.int8, torch.uint8]
+            if not NodeConverter.uses_quantization_type_for_io(
+                node, supported_types, [0, 1], [0]
+            ):
+                return False
+
+            return True
+        else:
+            if NodeConverter.uses_shape_broadcasting(node):
+                # Shape broadcasting may require the addition of `Transpose` ops during conversion.
+                return False
+
+            return True
 
     @staticmethod
     def _is_supported_in_IR(
@@ -45,9 +70,12 @@ def _is_supported_in_IR(
 
         return True
 
-    # sub.Tensor Node format: (Tensor self, Tensor other, *, Scalar alpha=1)
     def convert(self, node: Node):
-        """Convert 'sub_tensor' operator to NeutronIR 'Sub'."""
+        """Convert 'sub_tensor' operator to NeutronIR 'Sub'.
+        The ExecuTorch schema is:
+            sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1)
+        """
+
         self.assert_convertible(node)
 
         t_op = self._create_tflite_op_with_io_tensors(node)

@@ -2,25 +2,39 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+
 import numpy as np
 import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
     EdgeProgramToIRConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    ModelInputSpec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
+    graph_contains_any_of_ops,
     ToChannelFirstPreprocess,
     ToChannelLastPreprocess,
 )
+from executorch.backends.nxp.tests.graph_verifier import DetailedGraphVerifier
+from executorch.backends.nxp.tests.model_output_comparator import (
+    NumericalStatsOutputComparator,
+)
 from executorch.backends.nxp.tests.models import (
     SubTensorConvModule,
     SubTensorModule,
     SubTensorOneInputModule,
 )
-from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.backends.nxp.tests.nsys_testing import lower_run_compare
+from executorch.backends.nxp.tests.ops_aliases import (
+    Convolution,
+    ExecutorchDelegateCall,
+    SubTensor,
+)
 from torch.export import ExportedProgram
 from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
@@ -63,7 +77,7 @@ def test_sub_tensor_quant_conversion(mocker, input_shape, use_qat):
     input_data = {0: input_data_1, 1: input_data_2}
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[4].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[4].target == SubTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -96,7 +110,7 @@ def test_sub_tensor_one_input_quant_conversion(mocker, input_shape, use_qat):
     input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[2].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[2].target == SubTensor
 
     convert_run_compare(
         exported_program, tfl_model=tflite_flatbuffers_model, input_data=input_data
@@ -141,7 +155,7 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat):
     input_data = {0: input_data_1, 1: input_data_2}
 
     nodes = list(exported_program.graph.nodes)
-    assert nodes[15].target == exir_ops.edge.aten.sub.Tensor
+    assert nodes[15].target == SubTensor
 
     convert_run_compare(
         exported_program,
@@ -176,6 +190,164 @@ def test_sub_tensor_broadcasting_unsupported_quant_conversion(
     nodes = list(edge_program.graph.nodes)
 
     # Broadcast is not supported, node is not converted
-    assert (
-        nodes[6].target == exir_ops.edge.aten.sub.Tensor
-    )  # Sub Tensor is not delegated.
+    assert nodes[6].target == SubTensor  # Sub Tensor is not delegated.
+
+
+class TestSubTensorNewNeutronFlow:
+    @pytest.mark.skip("AIR-14602: incorrect results")
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((6, 8), id="2D."),
+            pytest.param((1, 4, 8), id="3D."),
+            pytest.param((1, 4, 8, 8), id="4D."),
+        ],
+    )
+    def test__basic_nsys_inference(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = SubTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.skip("AIR-14602: incorrect results")
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param((6, 8), id="2D."),
+            pytest.param((1, 4, 8), id="3D."),
+            pytest.param((1, 4, 8, 8), id="4D."),
+        ],
+    )
+    def test__basic_nsys_inference_qat(self, x_input_shape, mocker):
+        x_input_spec = ModelInputSpec(x_input_shape)
+        model = SubTensorModule()
+        comparator = NumericalStatsOutputComparator()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+
+        lower_run_compare(
+            model,
+            [x_input_spec, x_input_spec],
+            graph_verifier,
+            output_comparator=comparator,
+            use_new_flow_neutron_c=True,
+            use_qat=True,
+        )
+
+    @pytest.mark.skip("AIR-14602: incorrect results")
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 6)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((5, 3, 4)), ModelInputSpec((1, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((4,)), ModelInputSpec((4, 4))], id="2 inputs 2D+3D."
+            ),
+        ],
+    )
+    def test__correct_broadcast(self, input_spec, mocker):
+        model = SubTensorModule()
+        graph_verifier = DetailedGraphVerifier(
+            mocker, expected_delegated_ops={SubTensor: 1}, expected_non_delegated_ops={}
+        )
+
+        lower_run_compare(
+            model, input_spec, graph_verifier, use_new_flow_neutron_c=True
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((4, 1)), ModelInputSpec((1, 6))], id="2 inputs 2D."
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 3, 4)), ModelInputSpec((5, 3, 1))],
+                id="2 inputs 3D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((6, 4)), ModelInputSpec((6, 6, 1))],
+                id="2 inputs 2D+3D.",
+            ),
+        ],
+    )
+    def test__incorrect_broadcast(self, input_spec):
+        # Broadcast where at least one of the inputs is not equal to output is not supported
+        model = SubTensorModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `add.Tensor` was NOT delegated.
+        assert not graph_contains_any_of_ops(
+            delegated_ep.graph, [ExecutorchDelegateCall]
+        )
+        assert graph_contains_any_of_ops(delegated_ep.graph, [SubTensor])
+
+    @pytest.mark.skip("AIR-14602: incorrect results")
+    @pytest.mark.parametrize(
+        "x_input_shape",
+        [
+            pytest.param(
+                (1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."
+            ),
+        ],
+    )
+    def test__w_conv(self, x_input_shape, mocker):
+        model = SubTensorConvModule()
+
+        n, c, h, w = x_input_shape
+        y_input_spec = ModelInputSpec((n, 8, h, w))
+        x_input_spec = ModelInputSpec(x_input_shape)
+
+        graph_verifier = DetailedGraphVerifier(
+            mocker,
+            expected_delegated_ops={SubTensor: 1, Convolution: 1},
+            expected_non_delegated_ops={},
+        )
+
+        lower_run_compare(
+            model,
+            [x_input_spec, y_input_spec],
+            graph_verifier,
+            use_new_flow_neutron_c=True,
+        )
+
+    @pytest.mark.parametrize(
+        "input_spec",
+        [
+            pytest.param(
+                [ModelInputSpec((1, 4, 5, 5)), ModelInputSpec((1, 5))],
+                id="2 inputs 4D + 2D.",
+            ),
+            pytest.param(
+                [ModelInputSpec((1, 4, 4, 10)), ModelInputSpec((1, 4, 1))],
+                id="2 inputs last + 3D.",
+            ),
+        ],
+    )
+    def test__w_conv_unsupported(self, input_spec):
+        model = SubTensorConvModule()
+
+        delegated_ep = to_quantized_edge_program(
+            model, input_spec, use_new_flow_neutron_c=True
+        ).exported_program()
+
+        # Make sure the `add.Tensor` was NOT delegated.
+        assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall])
+        assert graph_contains_any_of_ops(delegated_ep.graph, [SubTensor])
@@ -32,6 +32,7 @@
 Squeeze = exir_ops.edge.aten.squeeze.default
 SqueezeDim = exir_ops.edge.aten.squeeze.dim
 SqueezeDims = exir_ops.edge.aten.squeeze.dims
+SubTensor = exir_ops.edge.aten.sub.Tensor
 Unsqueeze = exir_ops.edge.aten.unsqueeze.default
 UpsampleBilinear2D = exir_ops.edge.aten.upsample_bilinear2d.vec
 UpsampleNearest2D = exir_ops.edge.aten.upsample_nearest2d.vec