From 3879789caeb75ee4eb075cc36cb04b39b234ba98 Mon Sep 17 00:00:00 2001
From: Christine Long <christinelong@meta.com>
Date: Fri, 15 May 2026 19:08:25 -0700
Subject: [PATCH 1/5] Add a16w8 per-op test for var (#19596)

Summary:

Add int16 activation / int8 weight (a16w8) quantization tests for `aten.var` on Ethos-U55 and Ethos-U85.

## Changes
- Add `test_parameters_ethosu` class attribute to `Var` with 2 test configurations (4D tensors with correction=0 and correction=1)
- Switch existing `test_var_dim_u55_INT_no_dim` and `test_var_dim_u85_INT_no_dim` from `Var.test_parameters` to `Var.test_parameters_ethosu` for Ethos-U compatible tensor shapes
- Add `test_var_a16w8_u55_INT` using `EthosU55PipelineINT` with `a16w8_quantization=True, symmetric_io_quantization=True`
- Add `test_var_a16w8_u85_INT` using `EthosU85PipelineINT` with same kwargs
- Register `ops/test_var.py` in `fbcode/` and `xplat/` `targets.bzl`

Differential Revision: D104532362
---
 backends/arm/test/ops/test_var.py | 39 +++++++++++++++++++++++++++++--
 backends/arm/test/targets.bzl     |  1 +
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py
index a7943bfc19b..23bf5f4cf37 100644
--- a/backends/arm/test/ops/test_var.py
+++ b/backends/arm/test/ops/test_var.py
@@ -32,6 +32,11 @@ class Var(torch.nn.Module):
         ),
     }
 
+    test_parameters_ethosu = {
+        "var_4d_keep_dim_0_correction": lambda: (torch.randn(1, 50, 10, 20), True, 0),
+        "var_4d_keep_dim_1_correction": lambda: (torch.randn(1, 30, 15, 20), True, 1),
+    }
+
     def __init__(self, keepdim: bool = True, correction: int = 0):
         super().__init__()
         self.keepdim = keepdim
@@ -170,7 +175,7 @@ def test_var_dim_tosa_INT_no_dim(test_data: Tuple):
     pipeline.run()
 
 
-@common.parametrize("test_data", Var.test_parameters)
+@common.parametrize("test_data", Var.test_parameters_ethosu)
 @common.XfailIfNoCorstone300
 def test_var_dim_u55_INT_no_dim(test_data: Tuple):
     test_data, keepdim, correction = test_data()
@@ -183,7 +188,7 @@ def test_var_dim_u55_INT_no_dim(test_data: Tuple):
     pipeline.run()
 
 
-@common.parametrize("test_data", Var.test_parameters)
+@common.parametrize("test_data", Var.test_parameters_ethosu)
 @common.XfailIfNoCorstone320
 def test_var_dim_u85_INT_no_dim(test_data: Tuple):
     test_data, keepdim, correction = test_data()
@@ -224,6 +229,36 @@ def test_var_dim_vgf_quant_no_dim(test_data: Tuple):
     pipeline.run()
 
 
+@common.parametrize("test_data", Var.test_parameters_ethosu)
+@common.XfailIfNoCorstone300
+def test_var_a16w8_u55_INT(test_data: Tuple):
+    test_data, keepdim, correction = test_data()
+    pipeline = EthosU55PipelineINT[input_t1](
+        Var(keepdim, correction),
+        (test_data,),
+        aten_ops=[],
+        exir_ops=[],
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Var.test_parameters_ethosu)
+@common.XfailIfNoCorstone320
+def test_var_a16w8_u85_INT(test_data: Tuple):
+    test_data, keepdim, correction = test_data()
+    pipeline = EthosU85PipelineINT[input_t1](
+        Var(keepdim, correction),
+        (test_data,),
+        aten_ops=[],
+        exir_ops=[],
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
+
+
 #############
 ## VarDim ###
 #############
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index b8030ae7ba8..30fa348414f 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -39,6 +39,7 @@ def define_arm_tests():
         "ops/test_exp.py",
         "ops/test_reciprocal.py",
         "ops/test_mean_dim.py",
+        "ops/test_var.py",
     ]
 
     # Quantization

From 09b0ebf7ebed3001651774070645787139d7d175 Mon Sep 17 00:00:00 2001
From: Christine Long <christinelong@meta.com>
Date: Fri, 15 May 2026 19:08:25 -0700
Subject: [PATCH 2/5] Add a16w8 per-op test for conv1d (#19597)

Summary:

Add int16 activation / int8 weight (a16w8) quantization tests for `aten.conv1d` on Ethos-U55 and Ethos-U85.

## Changes
- Add `test_conv1d_a16w8_u55_INT` using `EthosU55PipelineINT` with `a16w8_quantization=True, symmetric_io_quantization=True`, reusing existing `test_data_INT` parameters
- Add `test_conv1d_a16w8_u85_INT` using `EthosU85PipelineINT` with same kwargs
- Register `ops/test_conv1d.py` in `fbcode/` and `xplat/` `targets.bzl`

Reviewed By: Ninja91

Differential Revision: D104532360
---
 backends/arm/test/ops/test_conv1d.py | 31 ++++++++++++++++++++++++++++
 backends/arm/test/targets.bzl        |  1 +
 2 files changed, 32 insertions(+)

diff --git a/backends/arm/test/ops/test_conv1d.py b/backends/arm/test/ops/test_conv1d.py
index 486b6b3ce7c..0e75eab621f 100644
--- a/backends/arm/test/ops/test_conv1d.py
+++ b/backends/arm/test/ops/test_conv1d.py
@@ -399,3 +399,34 @@ def test_convolution_1d_vgf_quant_a8w4(test_data):
         get_symmetric_a8w4_quantization_config(is_per_channel=per_channel_quantization)
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", test_data_INT)
+@common.XfailIfNoCorstone300
+def test_conv1d_a16w8_u55_INT(test_data):
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU55PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op,
+        exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
+
+@common.parametrize("test_data", test_data_INT)
+@common.XfailIfNoCorstone320
+def test_conv1d_a16w8_u85_INT(test_data):
+    model, per_channel_quantization = test_data()
+    pipeline = EthosU85PipelineINT[input_t](
+        model,
+        model.get_inputs(),
+        aten_op,
+        exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        per_channel_quantization=per_channel_quantization,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 30fa348414f..1b2dfa8e2c6 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -40,6 +40,7 @@ def define_arm_tests():
         "ops/test_reciprocal.py",
         "ops/test_mean_dim.py",
         "ops/test_var.py",
+        "ops/test_conv1d.py",
     ]
 
     # Quantization

From f281d17e65a451cedfdb327e6adfc6ab1acaf29f Mon Sep 17 00:00:00 2001
From: Christine Long <christinelong@meta.com>
Date: Fri, 15 May 2026 19:08:25 -0700
Subject: [PATCH 3/5] Add a16w8 per-op test for gelu (#19598)

Summary:

Add int16 activation / int8 weight (a16w8) quantization tests for `aten.gelu` on Ethos-U55 and Ethos-U85.

## Changes
- Add `test_gelu_a16w8_u55_INT` using `EthosU55PipelineINT` with `a16w8_quantization=True, symmetric_io_quantization=True, qtol=128, epsilon=2**-16`, reusing existing `Gelu.test_data` parameters (12 test configurations covering both \`none\` and \`tanh\` approximation modes)
- Add `test_gelu_a16w8_u85_INT` using `EthosU85PipelineINT` with same kwargs
- Register `ops/test_gelu.py` in `fbcode/` and `xplat/` `targets.bzl`

bypass-pytorch-oss-checks

Differential Revision: D104532359
---
 backends/arm/test/ops/test_gelu.py | 35 ++++++++++++++++++++++++++++++
 backends/arm/test/targets.bzl      |  1 +
 2 files changed, 36 insertions(+)

diff --git a/backends/arm/test/ops/test_gelu.py b/backends/arm/test/ops/test_gelu.py
index 82e3cb1e53e..84626c70b4d 100644
--- a/backends/arm/test/ops/test_gelu.py
+++ b/backends/arm/test/ops/test_gelu.py
@@ -6,6 +6,7 @@
 from typing import Tuple
 
 import torch
+
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -176,3 +177,37 @@ def test_gelu_vgf_quant(test_data: input_t1):
         quantize=True,
     )
     pipeline.run()
+
+
+@common.parametrize("test_data", Gelu.test_data)
+@common.XfailIfNoCorstone300
+def test_gelu_a16w8_u55_INT(test_data: input_t1):
+    approximate, data = test_data()
+    pipeline = EthosU55PipelineINT[input_t1](
+        Gelu(approximate),
+        (data,),
+        Gelu.aten_op,
+        Gelu.exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        qtol=128,
+        epsilon=2**-16,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Gelu.test_data)
+@common.XfailIfNoCorstone320
+def test_gelu_a16w8_u85_INT(test_data: input_t1):
+    approximate, data = test_data()
+    pipeline = EthosU85PipelineINT[input_t1](
+        Gelu(approximate),
+        (data,),
+        Gelu.aten_op,
+        Gelu.exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+        qtol=128,
+        epsilon=2**-16,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 1b2dfa8e2c6..16a554da3b3 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -41,6 +41,7 @@ def define_arm_tests():
         "ops/test_mean_dim.py",
         "ops/test_var.py",
         "ops/test_conv1d.py",
+        "ops/test_gelu.py",
     ]
 
     # Quantization

From 2381b0fd57441aaf6fbc4dc9e87babeafa01b3e8 Mon Sep 17 00:00:00 2001
From: Christine Long <christinelong@meta.com>
Date: Fri, 15 May 2026 19:08:25 -0700
Subject: [PATCH 4/5] Add a16w8 per-op test for bmm (#19599)

Summary:

Add int16 activation / int8 weight (a16w8) quantization tests for `aten.bmm` on Ethos-U55 and Ethos-U85.

## Changes
- Add `a16w8_bmm_test_parameters` dict with 5 test configurations covering same-shape, different-shape, rectangular, batch-10, and negative-value tensors
- Add `test_bmm_a16w8_u55_INT` using `OpNotSupportedPipeline` to verify that bmm with INT16 inputs is correctly rejected on U55 (which does not support bmm with int16)
- Add `test_bmm_a16w8_u85_INT` using `EthosU85PipelineINT` with `a16w8_quantization=True, symmetric_io_quantization=True`
- Remove unused `aten_op_mm` and `exir_op_mm` variables
- Register `ops/test_bmm.py` in `fbcode/` and `xplat/` `targets.bzl`

Differential Revision: D104532363
---
 backends/arm/test/ops/test_bmm.py | 50 ++++++++++++++++++++++++++++---
 backends/arm/test/targets.bzl     |  1 +
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/backends/arm/test/ops/test_bmm.py b/backends/arm/test/ops/test_bmm.py
index d21b33cfec4..f67a6902ef2 100644
--- a/backends/arm/test/ops/test_bmm.py
+++ b/backends/arm/test/ops/test_bmm.py
@@ -1,4 +1,4 @@
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
+# Copyright 2024-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -12,9 +12,11 @@
 
 from executorch.backends.arm.test import common
 
+from executorch.backends.arm.quantizer import get_symmetric_a16w8_quantization_config
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
     EthosU85PipelineINT,
+    OpNotSupportedPipeline,
     TosaPipelineFP,
     TosaPipelineINT,
     VgfPipeline,
@@ -23,9 +25,6 @@
 aten_op_bmm = "torch.ops.aten.bmm.default"
 exir_op_bmm = "executorch_exir_dialects_edge__ops_aten_bmm_default"
 
-aten_op_mm = "torch.ops.aten.matmul.default"
-exir_op_mm = "executorch_exir_dialects_edge__ops_aten_matmul_default"
-
 input_t1 = Tuple[torch.Tensor, torch.Tensor]  # Input x
 
 
@@ -191,3 +190,46 @@ def test_bmm_vgf_quant_single_input(test_data: input_t1):
         quantize=True,
     )
     pipeline.run()
+
+
+a16w8_bmm_test_parameters = {
+    "rand_same": lambda: (torch.rand(2, 1, 1), torch.rand(2, 1, 1)),
+    "rand_diff": lambda: (torch.rand(5, 3, 5), torch.rand(5, 5, 2)),
+    "rand_rect": lambda: (torch.rand(1, 55, 3), torch.rand(1, 3, 44)),
+    "rand_batch10": lambda: (torch.rand(10, 1, 10), torch.rand(10, 10, 5)),
+    "rand_neg": lambda: (
+        -10 * torch.randn(2, 32, 64),
+        5 + 5 * torch.randn(2, 64, 32),
+    ),
+}
+
+
+@common.parametrize("test_data", a16w8_bmm_test_parameters)
+@common.XfailIfNoCorstone300
+def test_bmm_a16w8_u55_INT(test_data: input_t1):
+    """U55 does not support bmm with INT16 inputs. Verify bmm is rejected."""
+    pipeline = OpNotSupportedPipeline[input_t1](
+        BMM(),
+        test_data(),
+        non_delegated_ops={exir_op_bmm: 1},
+        n_expected_delegates=0,
+        u55_subset=True,
+        quantize=True,
+        tosa_extensions=["int16"],
+    )
+    pipeline.quantizer.set_global(get_symmetric_a16w8_quantization_config())
+    pipeline.run()
+
+
+@common.parametrize("test_data", a16w8_bmm_test_parameters)
+@common.XfailIfNoCorstone320
+def test_bmm_a16w8_u85_INT(test_data: input_t1):
+    pipeline = EthosU85PipelineINT[input_t1](
+        BMM(),
+        test_data(),
+        aten_op_bmm,
+        exir_op_bmm,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 16a554da3b3..20e122055d9 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -42,6 +42,7 @@ def define_arm_tests():
         "ops/test_var.py",
         "ops/test_conv1d.py",
         "ops/test_gelu.py",
+        "ops/test_bmm.py",
     ]
 
     # Quantization

From efae0d5e94eacba5f2e24e0062c5df42e8b1dc53 Mon Sep 17 00:00:00 2001
From: Christine Long <christinelong@meta.com>
Date: Fri, 15 May 2026 19:08:25 -0700
Subject: [PATCH 5/5] Add a16w8 per-op test for split

Summary:
Add int16 activation / int8 weight (a16w8) quantization tests for `aten.split` on Ethos-U55 and Ethos-U85.

## Changes
- Add `a16w8_split_test_parameters` dict with 3 test configurations covering 1D, 2D, and 3D splits along different axes
- Add `test_split_a16w8_u55_INT` using `EthosU55PipelineINT` with `a16w8_quantization=True, symmetric_io_quantization=True`
- Add `test_split_a16w8_u85_INT` using `EthosU85PipelineINT` with same kwargs
- Register `ops/test_split.py` in `fbcode/` and `xplat/` `targets.bzl`

Differential Revision: D104533281
---
 backends/arm/test/ops/test_split.py | 35 +++++++++++++++++++++++++++++
 backends/arm/test/targets.bzl       |  1 +
 2 files changed, 36 insertions(+)

diff --git a/backends/arm/test/ops/test_split.py b/backends/arm/test/ops/test_split.py
index 6af3362de7a..f655785410c 100644
--- a/backends/arm/test/ops/test_split.py
+++ b/backends/arm/test/ops/test_split.py
@@ -310,3 +310,38 @@ def test_split_tensor_vgf_quant(test_data: Tuple):
         quantize=True,
     )
     pipeline.run()
+
+
+a16w8_split_test_parameters = {
+    "a16w8_1d_split_2": lambda: (torch.rand(10), 2, 0),
+    "a16w8_2d_split_4": lambda: (torch.rand(8, 4), 4, 0),
+    "a16w8_3d_split_4": lambda: (torch.rand(4, 4, 8), 4, 2),
+}
+
+
+@common.parametrize("test_data", a16w8_split_test_parameters)
+@common.XfailIfNoCorstone300
+def test_split_a16w8_u55_INT(test_data: input_t1):
+    pipeline = EthosU55PipelineINT[input_t1](
+        Split(),
+        test_data(),
+        aten_ops=[],
+        exir_ops=exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", a16w8_split_test_parameters)
+@common.XfailIfNoCorstone320
+def test_split_a16w8_u85_INT(test_data: input_t1):
+    pipeline = EthosU85PipelineINT[input_t1](
+        Split(),
+        test_data(),
+        aten_ops=[],
+        exir_ops=exir_op,
+        a16w8_quantization=True,
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index 20e122055d9..ab2cad7516e 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -43,6 +43,7 @@ def define_arm_tests():
         "ops/test_conv1d.py",
         "ops/test_gelu.py",
         "ops/test_bmm.py",
+        "ops/test_split.py",
     ]
 
     # Quantization