diff --git a/backends/nxp/backend/custom_delegation_options.py b/backends/nxp/backend/custom_delegation_options.py index 6f669604226..18eadc0bbbf 100644 --- a/backends/nxp/backend/custom_delegation_options.py +++ b/backends/nxp/backend/custom_delegation_options.py @@ -22,7 +22,3 @@ class CustomDelegationOptions: # not create any NeutronGraph that can be called. This is done by the partitioner itself, and is not handled by # the individual node converters. allow_no_op_partitions: bool = False - - # The new neutron converter flow has different constraints for supported operators. These need to be addressed when - # deciding is operator is delegated or not in _is_supported_on_target(). - use_new_flow_neutron_c: bool = False diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py index e3052ee1205..cb3a360f604 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/abs_converter.py @@ -5,7 +5,6 @@ import torch - from executorch.backends.nxp.backend.ir.converter.node_converter import ( CustomDelegationOptions, NeutronTargetSpec, @@ -36,7 +35,7 @@ def _is_supported_on_target( custom_delegation_options: CustomDelegationOptions, ) -> bool: - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py index b8ad7211a56..02cf73016b6 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/avg_pool_2d_converter.py @@ -5,7 +5,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.ir.converter.conversion import ( aten_translator, common, @@ -22,7 +21,6 @@ from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( average_pool_2d_options, ) - from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node from torch.nn import Parameter @@ -66,7 +64,7 @@ def _is_supported_on_target( kernel = node.args[1] stride = node.args[2] - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py index 0917c03038c..4f4ba348d01 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/clamp_converter.py @@ -3,8 +3,12 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from copy import copy + +import numpy as np from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.node_converter import ( + _is_dequant_node, CustomDelegationOptions, is_not_qdq_node, NodeConverter, @@ -12,6 +16,11 @@ from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( BuiltinOperator, ) +from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model +from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( + maximum_options, + minimum_options, +) from executorch.backends.nxp.backend.neutron_operator_support import ( activation_supported_on_target, ) @@ -21,6 +30,16 @@ from torch.nn import Parameter +def _is_convertible_to_relu(node): + bounds = ClampConverter._get_clamp_bounds(node) + + # Only some specific bounds are supported on the target hardware. + if bounds not in ClampConverter.SUPPORTED_BOUNDS.values(): + return False + + return True + + class ClampConverter(NodeConverter): SUPPORTED_BOUNDS = { "ReluN1To1": (-1, 1), @@ -48,7 +67,7 @@ def _get_clamp_bounds(clamp_node: Node) -> tuple[float | None, float | None]: def _is_supported_in_IR( node: Node, parameters_mapping: dict[str, Parameter], - custom_delegation_options: CustomDelegationOptions, + _: CustomDelegationOptions, ) -> bool: # No NeutronIR-specific restrictions. return True @@ -58,22 +77,19 @@ def _is_supported_on_target( node: Node, neutron_target_spec: NeutronTargetSpec, parameters_mapping: dict[str, Parameter], - custom_delegation_options: CustomDelegationOptions, + _: CustomDelegationOptions, ) -> bool: - bounds = ClampConverter._get_clamp_bounds(node) - - # Only some specific bounds are supported on the target hardware. - if bounds not in ClampConverter.SUPPORTED_BOUNDS.values(): - return False + if neutron_target_spec.use_new_flow_neutron_c: + return True - return True + return _is_convertible_to_relu(node) @classmethod def supports_partitioning_result( cls, node: Node, partition_list: list[Partition], - custom_delegation_options: CustomDelegationOptions, + _: CustomDelegationOptions, neutron_target_spec: NeutronTargetSpec, parameters_mapping: dict[str, Parameter], ) -> bool: @@ -91,6 +107,15 @@ def supports_partitioning_result( return True + @staticmethod + def propagate_quantization(from_node, to_node): + to_node.quantization = copy(from_node.quantization) + + @staticmethod + def _quantize_value(value, zp, scale, quant_min, quant_max): + rescaled_value = round(value / scale) + zp + return np.clip(rescaled_value, quant_min, quant_max) + def convert(self, node: Node): """Convert the `aten.clamp.default` operator to Neutron IR `Relu*` operators. The schema is: @@ -101,13 +126,57 @@ def convert(self, node: Node): ) -> Tensor """ self.assert_convertible(node) + to_relu = _is_convertible_to_relu(node) bounds = self._get_clamp_bounds(node) - t_op = self._create_tflite_op_with_io_tensors(node) - # noinspection PyTypeChecker,PyUnboundLocalVariable - t_op.opcode_index = self.builder.op_code_index_for_op_type( - self.BOUNDS_TO_NEUTRON_IR_OP[bounds] + if not self.neutron_target_spec.use_new_flow_neutron_c or to_relu: + # noinspection PyTypeChecker,PyUnboundLocalVariable + t_op.opcode_index = self.builder.op_code_index_for_op_type( + self.BOUNDS_TO_NEUTRON_IR_OP[bounds] + ) + self.builder.append_operators([t_op]) + return + + q_node = node.args[0] + assert _is_dequant_node(q_node) + _, scale, zp, quant_min, quant_max, _ = q_node.args + + x = t_op.tmp_inputs[0] + y = t_op.tmp_outputs[0] + + if x.quantization is not None and y.quantization is None: + self.propagate_quantization(x, y) + + if x.quantization != y.quantization: + raise AssertionError( + "Input and output quantization should be same in order to convert to max/min." + ) + + max_y = self.builder.duplicate_tensor(x) + + min_value, max_value = bounds + min_value = self._quantize_value(min_value, zp, scale, quant_min, quant_max) + max_value = self._quantize_value(max_value, zp, scale, quant_min, quant_max) + + min_tensor = self.builder.create_tensor_for_data( + np.array([min_value], np.int8), "min" + ) + self.propagate_quantization(x, min_tensor) + max_tensor = self.builder.create_tensor_for_data( + np.array([max_value], np.int8), "max" ) - self.builder.append_operators([t_op]) + self.propagate_quantization(x, max_tensor) + + max_op = tflite_model.Operator(builtin_options=maximum_options.Maximum()) + max_op.tmp_inputs = [x, max_tensor] + max_op.tmp_outputs = [max_y] + + min_op = tflite_model.Operator(builtin_options=minimum_options.Minimum()) + min_op.tmp_inputs = [max_y, min_tensor] + min_op.tmp_outputs = [y] + + self.propagate_quantization(x, max_y) + + self.builder.append_operators([max_op, min_op]) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py index e300d6bbe9f..73de100e8b2 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -7,7 +7,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.conversion import ( aten_translator, @@ -74,7 +73,7 @@ def _is_supported_on_target( MaxPool2DWithIndicesConverter._get_node_args(node) ) - if custom_delegation_options.use_new_flow_neutron_c: + if neutron_target_spec.use_new_flow_neutron_c: # Requirements specified by the new Neutron flow documentation. supported_types = [torch.int8, torch.uint8] diff --git a/backends/nxp/backend/neutron_target_spec.py b/backends/nxp/backend/neutron_target_spec.py index a1d71cabddb..563bd4759a2 100644 --- a/backends/nxp/backend/neutron_target_spec.py +++ b/backends/nxp/backend/neutron_target_spec.py @@ -8,12 +8,10 @@ from enum import Enum import torch - from executorch.backends.nxp.backend.neutron_converter_manager import ( NeutronConverterManager, ) from executorch.exir.dialects._ops import ops as exir_ops - from torch.fx import Node @@ -98,13 +96,17 @@ class NeutronTargetSpec: The functionality for probing the properties of Neutron Target. """ - def __init__(self, target: str): + def __init__(self, target: str, use_new_flow_neutron_c: bool = False): converter_manager = NeutronConverterManager() converter_manager.verify_target(target) neutron_converter = converter_manager.get_converter() self.neutron_target = neutron_converter.getNeutronTarget(target) + # The new neutron converter flow has different constraints for supported operators. These need to be addressed when + # deciding is operator is delegated or not in _is_supported_on_target(). + self.use_new_flow_neutron_c = use_new_flow_neutron_c + if self.is_subsystem(): raise ValueError( f"Target `{target}` is not a neutron-C target. Only MCU targets are supported at the moment." diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index f5e89823ee2..5c3b056bf72 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -14,7 +14,6 @@ import numpy as np import torch - from executorch.backends.nxp.backend.custom_delegation_options import ( CustomDelegationOptions, ) @@ -86,7 +85,9 @@ def neutron_compile_spec( :return: self for method chaining """ - self.config = NeutronTargetSpec(config) + self.config = NeutronTargetSpec( + config, use_new_flow_neutron_c=use_new_flow_neutron_c + ) assert ( self.output_format is None @@ -230,11 +231,11 @@ def preprocess( # noqa C901 ) tflite_model, io_formats = EdgeProgramToIRConverter().convert_program( edge_program, - neutron_target_spec=NeutronTargetSpec(target), - conversion_config=conversion_config, - custom_delegation_options=CustomDelegationOptions( - use_new_flow_neutron_c=use_new_flow_neutron_c + neutron_target_spec=NeutronTargetSpec( + target, use_new_flow_neutron_c=use_new_flow_neutron_c ), + conversion_config=conversion_config, + custom_delegation_options=CustomDelegationOptions(), ) neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert( diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 0c46678b25a..bc2cc395002 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -9,7 +9,6 @@ _get_default_passes, NeutronAtenPassManager, ) - from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.quantizer.patterns import ( AbsPattern, @@ -255,53 +254,63 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False) OpQuantizer = NeutronAtenQuantizer super().__init__( [ - OpQuantizer(AbsPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AdaptiveAvgPoolPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AddTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AbsPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + AdaptiveAvgPoolPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(AddTensorPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(AddmmPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(AvgPool1DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(AvgPool2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(BatchNormPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(BMMPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(CatPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ClampPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AvgPool1DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(AvgPool2DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(BatchNormPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(BMMPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(CatPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ClampPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(Conv2dPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer( ConvTranspose2dPattern(self, is_qat=is_qat), static_qconfig ), - OpQuantizer(DropoutPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(FlattenPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(HardTanhPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(HardTanhInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(LeakyReluPattern(is_qat=is_qat), static_fc_qconfig), - OpQuantizer(LeakyReluInPlacePattern(is_qat=is_qat), static_fc_qconfig), + OpQuantizer(DropoutPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(FlattenPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(HardTanhPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + HardTanhInPlacePattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(LeakyReluPattern(self, is_qat=is_qat), static_fc_qconfig), + OpQuantizer( + LeakyReluInPlacePattern(self, is_qat=is_qat), static_fc_qconfig + ), OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(MaxPool1DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(MaxPool2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool1DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool2DPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(MeanDimPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig), - OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(NegPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PadPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PermutePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(PReLUPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReluPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReluInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ReshapePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SigmoidPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SliceTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SoftMaxPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezeDimPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezeDimsPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SqueezePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(SubTensorPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TanhPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TanhInPlacePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(TransposeIntPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UnsqueezePattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UpsampleBilinear2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(UpsampleNearest2DPattern(is_qat=is_qat), static_qconfig), - OpQuantizer(ViewPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MulTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(NegPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PadPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PermutePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(PReLUPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReluPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReluInPlacePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ReshapePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SigmoidPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SliceTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SoftMaxPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezeDimPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezeDimsPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SqueezePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(SubTensorPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TanhPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TanhInPlacePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(TransposeIntPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(UnsqueezePattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer( + UpsampleBilinear2DPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer( + UpsampleNearest2DPattern(self, is_qat=is_qat), static_qconfig + ), + OpQuantizer(ViewPattern(self, is_qat=is_qat), static_qconfig), ] ) diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index bda554e0cce..12282adb872 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -10,7 +10,9 @@ from functools import partial import torch - +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.clamp_converter import ( + _is_convertible_to_relu, +) from executorch.backends.nxp.quantizer.utils import ( get_bias_qparams, get_bias_qparams_transp_conv, @@ -86,7 +88,8 @@ class PartitionAnchors: class QuantizationPattern(ABC): - def __init__(self, is_qat: bool = False): + def __init__(self, neutron_quantizer, is_qat: bool = False): + self.neutron_quantizer = neutron_quantizer self.is_qat = is_qat @abstractmethod @@ -157,9 +160,6 @@ def get_anchors( class BatchNormPattern(QuantizationPattern): - def __init__(self, is_qat: bool): - super().__init__(is_qat=is_qat) - def partition_types(self) -> list[OpOverload]: # BatchNorm quantization is needed only when in QAT mode return [torch.ops.aten.batch_norm.default] if self.is_qat else [] @@ -227,9 +227,8 @@ def partition_types(self): class AddmmPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -412,12 +411,48 @@ def get_anchors( ) -class ClampPattern(SingleInputBasicPattern): +class ClampPattern(QuantizationPattern): """Quantizer for the `aten.clamp.default` operator.""" def partition_types(self): return [torch.ops.aten.clamp.default] + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + node = fused_partition[0].nodes[-1] + + if ( + self.neutron_quantizer.neutron_target_spec.use_new_flow_neutron_c + and not _is_convertible_to_relu(node) + ): + # Shared spec pattern + assert len(fused_partition[0].input_nodes) == 1 + prev_node = fused_partition[0].input_nodes[0] + + # Previous node was not quantized => we are not able to share q-params + if Q_ANNOTATION_KEY not in prev_node.meta: + return None + + qspec = SharedQuantizationSpec(prev_node) + + return PartitionAnchors( + inputs=[(node, NodeArgsIdx(0))], + weights=[], + biases=[], + output=[ + (node, qspec), + ], + ) + else: + # Single input pattern + return PartitionAnchors( + inputs=[(node, NodeArgsIdx(0))], + weights=[], + biases=[], + output=[(node,)], + ) + def _is_batch_norm(node_: Node) -> bool: return node_.op == "call_function" and node_.target in [ @@ -488,9 +523,8 @@ def get_anchors( class Conv2dPattern(ConvPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -582,7 +616,7 @@ def get_anchors( class ConvTranspose2dPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( @@ -745,9 +779,8 @@ def partition_types(self): class LinearPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -836,9 +869,8 @@ def partition_types(self): class MmPattern(QuantizationPattern): def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) @@ -1172,9 +1204,8 @@ class ActivationsConcatClusterPattern(QuantizationPattern): """ def __init__(self, neutron_quantizer, is_qat: bool = False): - super().__init__(is_qat=is_qat) + super().__init__(neutron_quantizer, is_qat=is_qat) - self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info ) diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index 8f588be621d..69a1a246b1a 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -13,7 +13,6 @@ import eiq_neutron_sdk import numpy as np import torch - from executorch import exir from executorch.backends.nxp.backend.custom_delegation_options import ( CustomDelegationOptions, @@ -98,7 +97,7 @@ def _get_default_quantizer(target_spec: NeutronTargetSpec, use_qat: bool) -> Qua def to_model_input_spec( - input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]] + input_spec: Iterable[ModelInputSpec] | tuple[int, ...] | list[tuple[int, ...]], ) -> tuple[ModelInputSpec, ...]: match input_spec: case _ if isinstance(input_spec, Iterable) and all( @@ -122,7 +121,7 @@ def to_model_input_spec( def get_calibration_inputs_fn_from_dataset_dir(dataset_dir) -> GetCalibrationInputsFn: def _nested( - input_spec: tuple[ModelInputSpec, ...] + input_spec: tuple[ModelInputSpec, ...], ) -> Iterable[tuple[torch.Tensor, ...]]: data = sorted(os.listdir(dataset_dir)) inputs_needed = len(input_spec) @@ -156,7 +155,7 @@ def _nested( def _get_example_input( - input_spec: tuple[ModelInputSpec, ...] + input_spec: tuple[ModelInputSpec, ...], ) -> tuple[torch.Tensor, ...]: example_input = [] for spec in input_spec: @@ -193,8 +192,9 @@ def to_quantized_edge_program( use_new_flow_neutron_c: bool = False, delegate_to_npu=True, ) -> EdgeProgramManager: - _neutron_target_spec = NeutronTargetSpec(target) - custom_delegation_options.use_new_flow_neutron_c = use_new_flow_neutron_c + _neutron_target_spec = NeutronTargetSpec( + target, use_new_flow_neutron_c=use_new_flow_neutron_c + ) if get_quantizer_fn is None: get_quantizer_fn = partial( _get_default_quantizer, _neutron_target_spec, use_qat diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py index 8ba3c97d19f..7890ebbe5b5 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_clamp_converter.py @@ -6,7 +6,6 @@ import numpy as np import pytest import torch - from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) @@ -180,3 +179,48 @@ def test_convert_clamp__no_delegation__unsupported_bounds(min, max): # Make sure the `clamp` was NOT delegated. assert graph_contains_any_of_ops(delegated_ep.graph, [Clamp]) + + +@pytest.mark.parametrize( + "min, max", + [ + pytest.param(10, 17, id="min = 10, max = 17 (Max/Min)"), + pytest.param(0, 1, id="min = 0, max = 1 (Relu0To1)"), + pytest.param(-1, 1, id="min = -1, max = 1 (ReluN1To1)"), + pytest.param(0, None, id="min = 0, max = None (Relu)"), + # Float bounds + pytest.param(10.0, 17.0, id="min = 10, max = 17 (Max/Min)"), + pytest.param(0.0, 1.0, id="min = 0, max = 1 (Relu0To1)"), + pytest.param(-1.0, 1.0, id="min = -1, max = 1 (ReluN1To1)"), + pytest.param(0.0, None, id="min = 0, max = None (Relu)"), + ], +) +def test_convert_clamp__new_neutron_c_flow(mocker, min, max): + input_shape = (23,) + model = AddClampModule(min, max) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + delegated_ep = to_quantized_edge_program( + model, input_shape, use_new_flow_neutron_c=True + ).exported_program() + + # Make sure the `clamp` was delegated. + assert graph_contains_any_of_ops(delegated_ep.graph, [ExecutorchDelegateCall]) + assert not graph_contains_any_of_ops(delegated_ep.graph, [Clamp]) + + # Verify correct behavior of the converted NeutronIR model. + intermediate_ep = converter_spy.call_args.args[1] + neutron_ir_model, _ = converter_spy.spy_return + + input_data = ( + np.random.random(input_shape).astype(np.float32) * 256.0 - 128.0 + ).astype(np.int8) + + # Make sure the tested program contains the `clamp`. + assert graph_contains_any_of_ops(intermediate_ep.graph, [Clamp]) + + convert_run_compare( + intermediate_ep, + tfl_model=neutron_ir_model, + input_data=input_data, + ) diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index dda223c5650..b64c8463d29 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -12,7 +12,6 @@ import executorch.extension.pybindings.portable_lib import executorch.kernels.quantized # noqa F401 - import torch from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( @@ -253,7 +252,9 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): if args.debug: logging.basicConfig(level=logging.DEBUG, format=FORMAT, force=True) - neutron_target_spec = NeutronTargetSpec(target=args.target) + neutron_target_spec = NeutronTargetSpec( + target=args.target, use_new_flow_neutron_c=args.use_new_flow_neutron_c + ) # 1. pick model from one of the supported lists model, example_inputs, calibration_inputs = get_model_and_inputs_from_name(