From 0c32fe82e3c641cb81dcd71b08c89d6d987e4c2b Mon Sep 17 00:00:00 2001 From: Github Executorch Date: Sat, 8 Nov 2025 23:25:06 -0800 Subject: [PATCH] Add CortexM Tests to CI Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .github/workflows/trunk.yml | 30 ++ .../test/test_quantize_op_fusion_pass.py | 369 ------------------ 2 files changed, 30 insertions(+), 369 deletions(-) delete mode 100644 backends/cortex_m/test/test_quantize_op_fusion_pass.py diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 03a13e3717b..e07750b744e 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -1066,3 +1066,33 @@ jobs: .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }} }" + + test-mcu-cortex-m-backend: + name: test-mcu-cortex-m-backend + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + runner: linux.2xlarge.memory + docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + source .ci/scripts/utils.sh + install_executorch "--use-pt-pinned-commit" + + # Install arm dependencies + .ci/scripts/setup-arm-baremetal-tools.sh + source examples/arm/ethos-u-scratch/setup_path.sh + + # To build cortex-m test runner + backends/cortex_m/test/build_test_runner.sh + + # To run cortex_m tests + pytest --config-file=backends/arm/test/pytest.ini backends/cortex_m/test diff --git a/backends/cortex_m/test/test_quantize_op_fusion_pass.py b/backends/cortex_m/test/test_quantize_op_fusion_pass.py deleted file mode 100644 index 95845597947..00000000000 --- a/backends/cortex_m/test/test_quantize_op_fusion_pass.py +++ /dev/null @@ -1,369 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import unittest - -import executorch -import executorch.backends.cortex_m.ops.operators # noqa - -import torch - -from executorch.backends.cortex_m.passes.quantized_op_fusion_pass import ( - QuantizedOpFusionPass, -) -from executorch.backends.cortex_m.passes.replace_quant_nodes_pass import ( - ReplaceQuantNodesPass, -) -from executorch.backends.cortex_m.test.test_helpers_passes_utils import ( - AddQuantizer, - check_count, - get_node_args, -) -from executorch.exir.dialects._ops import ops as exir_ops -from torch.export import export -from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e - - -class TestQuantizedOpFusionPass(unittest.TestCase): - """ - Test suite for the QuantizedOpFusionPass which fuses dequantize->add->quantize patterns - into a single quantized_add operation with AoT-computed parameters. - """ - - def setUp(self): - """Set up common test fixtures""" - self.example_inputs = (torch.randn(4, 8), torch.randn(4, 8)) - - def _prepare_quantized_model(self, model_class): - """Helper to prepare a quantized model for testing""" - model = model_class() - - # Export and quantize - exported_model = export(model.eval(), self.example_inputs, strict=True).module() - prepared_model = prepare_pt2e(exported_model, AddQuantizer()) - quantized_model = convert_pt2e(prepared_model) - - # Export to EXIR Edge - exported = export(quantized_model, self.example_inputs, strict=True) - edge_program = executorch.exir.to_edge( - exported, - compile_config=executorch.exir.EdgeCompileConfig(_check_ir_validity=False), - ) - return edge_program - - def _apply_passes(self, edge_program): - """Apply both ReplaceQuantNodesPass and QuantizedOpFusionPass""" - passes = [QuantizedOpFusionPass(), ReplaceQuantNodesPass()] - final_program = edge_program.transform(passes) - return final_program - - def test_single_add_fusion(self): - """Single add with full Q/DQ pattern should fuse into one quantized_add node""" - - class SingleAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # Prepare model - edge_program = self._prepare_quantized_model(SingleAddModel) - edge_graph = edge_program.exported_program().graph_module - - # Get reference output - reference_output = edge_graph(*self.example_inputs) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Verify fusion occurred - check_count( - transformed_graph, - exir_ops.edge.cortex_m.quantized_add.default, - 1, # Should have exactly 1 fused quantized_add - ) - - # Verify the following - # Before fusion: - # x --> quantize_per_tensor --> dequantize_per_tensor --> add --> quantize_per_tensor --> - # dequantize_per_tensor --> output y --> quantize_per_tensor --> dequantize_per_tensor --^ - # After fusion: - # x --> quantize_per_tensor --> quantized_add --> dequantize_per_tensor --> output - # y --> quantize_per_tensor --^ - check_count( - transformed_graph, exir_ops.edge.cortex_m.quantize_per_tensor.default, 2 - ) - check_count( - transformed_graph, exir_ops.edge.cortex_m.dequantize_per_tensor.default, 1 - ) - check_count(transformed_graph, exir_ops.edge.cortex_m.quantized_add.default, 1) - - # Verify numerical equivalence - fused_output = transformed_graph(*self.example_inputs) - torch.testing.assert_close(reference_output, fused_output, rtol=1e-3, atol=1e-3) - - def test_multiple_add_fusion(self): - """Multiple independent adds should create multiple quantized_add nodes""" - - class MultipleAddModel(torch.nn.Module): - def forward(self, x, y): - z1 = x + y # First add - z2 = x + z1 # Second add - return z2 - - # Prepare model - edge_program = self._prepare_quantized_model(MultipleAddModel) - edge_graph = edge_program.exported_program().graph_module - - # Get reference output - reference_output = edge_graph(*self.example_inputs) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Verify multiple fusions occurred - check_count( - transformed_graph, - exir_ops.edge.cortex_m.quantized_add.default, - 2, # Should have 2 fused quantized_add nodes - ) - - # Verify numerical equivalence - fused_output = transformed_graph(*self.example_inputs) - torch.testing.assert_close(reference_output, fused_output, rtol=1e-3, atol=1e-3) - - def test_no_fusion_without_pattern(self): - """Add without proper Q/DQ pattern should not be fused""" - - class NonQuantizedAddModel(torch.nn.Module): - def forward(self, x, y): - # This will have add but not the full Q/DQ pattern after quantization - return torch.relu(x + y) # ReLU breaks the pattern - - # For this test, we'll create a model that doesn't have the complete pattern - # We need to manually construct a graph that has add without full Q/DQ - - model = NonQuantizedAddModel() - exported = export(model, self.example_inputs, strict=True) - edge_program = executorch.exir.to_edge( - exported, - compile_config=executorch.exir.EdgeCompileConfig(_check_ir_validity=False), - ) - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Verify no fusion occurred - check_count( - transformed_graph, - exir_ops.edge.cortex_m.quantized_add.default, - 0, # Should have no fused quantized_add nodes - ) - - def test_precomputed_parameters(self): - """Fused node should have precomputed multipliers/shifts instead of scales""" - - class SingleAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # Prepare model - edge_program = self._prepare_quantized_model(SingleAddModel) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Get arguments of the fused quantized_add node - quantized_add_args = get_node_args( - transformed_graph, exir_ops.edge.cortex_m.quantized_add.default - ) - - # Should have exactly one quantized_add node - self.assertEqual(len(quantized_add_args), 1) - args = quantized_add_args[0] - - # Verify argument structure: (tensor1, zp1, mult1, shift1, tensor2, zp2, mult2, shift2, out_zp, out_mult, out_shift) - self.assertEqual(len(args), 11, "quantized_add should have 11 arguments") - - # Check that multipliers and shifts are integers (not floats/scales) - # args[2], args[3] = input1 multiplier, shift - # args[6], args[7] = input2 multiplier, shift - # args[9], args[10] = output multiplier, shift - for i in [2, 3, 6, 7, 9, 10]: # multiplier and shift positions - self.assertIsInstance( - args[i], int, f"Argument {i} should be an integer (precomputed)" - ) - - def test_mixed_fusion_pattern(self): - """Mixed pattern (some fusable, some not) should partially fuse""" - - class MixedModel(torch.nn.Module): - def forward(self, x, y): - z1 = x + y # This should fuse - z2 = torch.relu(z1) # ReLU breaks next fusion - z3 = z2 + x # This won't have full Q/DQ pattern - return z3 - - # Prepare model - edge_program = self._prepare_quantized_model(MixedModel) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Should have partial fusion (at least 1, but not necessarily all adds) - quantized_add_count = sum( - 1 - for node in transformed_graph.graph.nodes - if node.op == "call_function" - and node.target == exir_ops.edge.cortex_m.quantized_add.default - ) - - self.assertGreaterEqual( - quantized_add_count, 1, "Should have at least 1 fused operation" - ) - - def test_different_tensor_shapes(self): - """Different tensor shapes should still fuse correctly""" - - class SingleAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # Test with different input shapes - for shape in [(2, 3), (10, 20, 30), (1,)]: - with self.subTest(shape=shape): - inputs = (torch.randn(shape), torch.randn(shape)) - - model = SingleAddModel() - exported_model = export(model.eval(), inputs, strict=True).module() - prepared_model = prepare_pt2e(exported_model, AddQuantizer()) - quantized_model = convert_pt2e(prepared_model) - - exported = export(quantized_model, inputs, strict=True) - edge_program = executorch.exir.to_edge( - exported, - compile_config=executorch.exir.EdgeCompileConfig( - _check_ir_validity=False - ), - ) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Verify fusion occurred regardless of shape - check_count( - transformed_graph, exir_ops.edge.cortex_m.quantized_add.default, 1 - ) - - def test_aot_parameter_computation_accuracy(self): - """Verify that AoT-computed parameters match runtime computation""" - - class SingleAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # Prepare model - edge_program = self._prepare_quantized_model(SingleAddModel) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - transformed_graph = transformed_program.exported_program().graph_module - - # Get the fused node arguments - quantized_add_args = get_node_args( - transformed_graph, exir_ops.edge.cortex_m.quantized_add.default - )[0] - - # Extract the computed multipliers and shifts - input1_mult, input1_shift = quantized_add_args[2], quantized_add_args[3] - input2_mult, input2_shift = quantized_add_args[6], quantized_add_args[7] - output_mult, output_shift = quantized_add_args[9], quantized_add_args[10] - - # Verify they are reasonable values - # Multipliers should be in int32 range - self.assertTrue(-(2**31) <= input1_mult < 2**31) - self.assertTrue(-(2**31) <= input2_mult < 2**31) - self.assertTrue(-(2**31) <= output_mult < 2**31) - - # Shifts should be reasonable (typically -31 to 31) - self.assertTrue(-50 <= input1_shift <= 50) - self.assertTrue(-50 <= input2_shift <= 50) - self.assertTrue(-50 <= output_shift <= 50) - - # Output multiplier should be close to 2^30 (for 1.0 scale) - self.assertAlmostEqual(output_mult, 2**30, delta=1000) - self.assertEqual(output_shift, -18) - - def test_executorch_program_generation(self): - """Verify ExecuTorch program generation with fused ops""" - - class SingleAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # Prepare model - edge_program = self._prepare_quantized_model(SingleAddModel) - - # Apply passes - transformed_program = self._apply_passes(edge_program) - - # Generate ExecuTorch program - executorch_program = transformed_program.to_executorch() - - # Verify the program contains the expected fused operator - operator_names = [ - op.name - for op in executorch_program.executorch_program.execution_plan[0].operators - ] - - self.assertIn("cortex_m::quantized_add", operator_names) - self.assertIn("cortex_m::quantize_per_tensor", operator_names) - self.assertIn("cortex_m::dequantize_per_tensor", operator_names) - # quantize_per_tensor --> dequantize_per_tensor --> add --> quantize_per_tensor --> dequantize_per_tensor - # (input quant) (dequant) (fp32 add) (re-quant) (dequant) - # ↓ - # Fusion Pass detects pattern: - # dequantize_per_tensor --> quantized_add (Fused node) --> quantize_per_tensor - - def test_broadcastable_shapes(self): - """Verify that broadcastable shapes are supported""" - - class BroadcastAddModel(torch.nn.Module): - def forward(self, x, y): - return x + y - - # input broadcastable shapes - inputs = (torch.randn(4, 1), torch.randn(4, 8)) - print(inputs) - - # Prepare quantized model - edge_program = self._prepare_quantized_model(BroadcastAddModel) - - # Get unfused output - unfused_graph = edge_program.exported_program().graph_module - unfused_output = unfused_graph(*inputs) - if isinstance(unfused_output, tuple): - unfused_output = unfused_output[0] - - # Apply fusion pass - fused_program = self._apply_passes(edge_program) - fused_graph = fused_program.exported_program().graph_module - fused_output = fused_graph(*inputs) - if isinstance(fused_output, tuple): - fused_output = fused_output[0] - - # Check fusion occurred - check_count(fused_graph, exir_ops.edge.cortex_m.quantized_add.default, 1) - - # Compare fused vs unfused (both quantized) - torch.testing.assert_close(fused_output, unfused_output, rtol=1e-3, atol=1e-3) - - -if __name__ == "__main__": - unittest.main()