From 58cd50c6bea9413a46712c9c21d3d545210ae75f Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 12 May 2026 01:12:00 +0200
Subject: [PATCH 1/7] Add XNNPACK backend testing for riscv64

It requires GCC 14+ to compile.
---
 .ci/scripts/test_riscv_qemu.sh         | 26 +++++++++++++++++++++++++-
 .github/workflows/_test_riscv.yml      | 18 ++++++++++++++++--
 .github/workflows/riscv64.yml          | 10 ++++++++++
 examples/riscv/aot_riscv.py            | 23 ++++++++++++++++++++++-
 examples/riscv/run.sh                  | 14 +++++++++++++-
 examples/riscv/setup.sh                |  9 +++++++--
 tools/cmake/preset/riscv64_linux.cmake | 14 ++++++++++++++
 7 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
index 27ab57f3b09..658c562ccf2 100755
--- a/.ci/scripts/test_riscv_qemu.sh
+++ b/.ci/scripts/test_riscv_qemu.sh
@@ -14,5 +14,29 @@ set -eu
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
+xnnpack=false
+
+usage() {
+    cat <<EOF
+Usage: $(basename "$0") [options]
+Options:
+  --xnnpack       Enable the XNNPACK backend (AOT partitioner + runtime)
+  -h, --help      Show this help
+EOF
+}
+
+for arg in "$@"; do
+    case $arg in
+        --xnnpack) xnnpack=true ;;
+        -h|--help) usage; exit 0 ;;
+        *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
+    esac
+done
+
+run_extra_args=()
+if ${xnnpack}; then
+    run_extra_args+=(--xnnpack)
+fi
+
 bash "${et_root_dir}/examples/riscv/setup.sh"
-bash "${et_root_dir}/examples/riscv/run.sh"
+bash "${et_root_dir}/examples/riscv/run.sh" "${run_extra_args[@]}"
diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml
index 79eec6cbb4c..ad919578348 100644
--- a/.github/workflows/_test_riscv.yml
+++ b/.github/workflows/_test_riscv.yml
@@ -12,13 +12,26 @@ on:
         required: false
         type: number
         default: 30
+      xnnpack:
+        description: 'Whether to enable XNNPACK'
+        required: false
+        type: boolean
+        default: false
+      gcc-version:
+        description: 'The version of GCC to use'
+        required: false
+        type: number
+      docker-image:
+        description: 'The docker image to use for this job'
+        required: false
+        type: string
 
 jobs:
   run:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc11
+      docker-image: ${{ inputs.docker-image || 'ci-image:executorch-ubuntu-22.04-gcc11' }}
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: ${{ inputs.timeout }}
@@ -29,4 +42,5 @@ jobs:
         source .ci/scripts/utils.sh
         install_executorch "--use-pt-pinned-commit"
 
-        bash .ci/scripts/test_riscv_qemu.sh
+        export GCC_VERSION=${{ inputs.gcc-version }}
+        bash .ci/scripts/test_riscv_qemu.sh ${{ inputs.xnnpack && '--xnnpack' }}
diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index aa970dc6270..bb935d90b6c 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -25,6 +25,16 @@ jobs:
   test-riscv:
     name: test-riscv
     uses: ./.github/workflows/_test_riscv.yml
+    strategy:
+      matrix:
+        include:
+          - { model: add, xnnpack: false }
+          - { model: add, xnnpack: true }
     permissions:
       id-token: write
       contents: read
+    with:
+      xnnpack: ${{ matrix.xnnpack }}
+      # XNNPACK requires GCC 14+
+      gcc-version: ${{ matrix.xnnpack && 14 }}
+      docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' }}
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 8076f056ba2..f4df2dbb83a 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -36,13 +36,34 @@ def main() -> None:
         default=Path("add_riscv.bpte"),
         help="Output .bpte path",
     )
+    parser.add_argument(
+        "--xnnpack",
+        action="store_true",
+        help="Lower through the XNNPACK partitioner",
+    )
     args = parser.parse_args()
 
     model = AddModule().eval()
     example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0))
 
     exported = export(model, example_inputs)
-    et_program = to_edge_transform_and_lower(exported).to_executorch()
+    partitioners = []
+    if args.xnnpack:
+        from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
+            XnnpackPartitioner,
+        )
+
+        partitioners.append(XnnpackPartitioner())
+
+    edge = to_edge_transform_and_lower(exported, partitioner=partitioners)
+    delegated = sum(
+        1
+        for n in edge.exported_program().graph.nodes
+        if n.op == "call_function" and "call_delegate" in str(n.target)
+    )
+    print(f"[aot_riscv] xnnpack={args.xnnpack} delegated_nodes={delegated}")
+
+    et_program = edge.to_executorch()
 
     test_inputs = [
         (torch.ones(1, 4), torch.full((1, 4), 2.0)),
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index 7c05edcbc8c..d5deb016fcc 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -20,11 +20,13 @@ build_dir="${et_root_dir}/cmake-out-riscv"
 output_dir="${et_root_dir}/riscv_test"
 qemu="qemu-riscv64-static"
 qemu_timeout="600"
+xnnpack=false
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
+  --xnnpack               Enable the XNNPACK backend (AOT partitioner + runtime)
   --build_only            Only export and cross-compile; do not invoke QEMU
   --build_dir=<DIR>       CMake build directory (default: ${build_dir})
   --output_dir=<DIR>      Directory for the exported .bpte (default: ${output_dir})
@@ -36,6 +38,7 @@ EOF
 
 for arg in "$@"; do
     case $arg in
+        --xnnpack) xnnpack=true ;;
         --build_only) build_only=true ;;
         --build_dir=*) build_dir="${arg#*=}" ;;
         --output_dir=*) output_dir="${arg#*=}" ;;
@@ -50,11 +53,20 @@ mkdir -p "${output_dir}"
 bpte_path="${output_dir}/add_riscv.bpte"
 
 echo "[run.sh] Step 1/3: AOT export on host"
-python "${script_dir}/aot_riscv.py" --output "${bpte_path}"
+aot_extra_args=()
+if ${xnnpack}; then
+    aot_extra_args+=(--xnnpack)
+fi
+python "${script_dir}/aot_riscv.py" "${aot_extra_args[@]}" --output "${bpte_path}"
 
 echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux"
+cmake_extra_args=()
+if ${xnnpack}; then
+    cmake_extra_args+=(-DEXECUTORCH_BUILD_XNNPACK=ON)
+fi
 cmake -S "${et_root_dir}" -B "${build_dir}" \
     --preset riscv64-linux \
+    "${cmake_extra_args[@]}" \
     -DCMAKE_BUILD_TYPE=Release
 cmake --build "${build_dir}" -j"$(nproc)" --target executor_runner
 
diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh
index c1342c60d5e..6fba1c25bdb 100755
--- a/examples/riscv/setup.sh
+++ b/examples/riscv/setup.sh
@@ -23,8 +23,8 @@ fi
 ${SUDO} apt-get update
 ${SUDO} apt-get install -y --no-install-recommends \
     build-essential \
-    gcc-riscv64-linux-gnu \
-    g++-riscv64-linux-gnu \
+    gcc${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
+    g++${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
     binutils-riscv64-linux-gnu \
     libc6-riscv64-cross \
     libc6-dev-riscv64-cross \
@@ -32,5 +32,10 @@ ${SUDO} apt-get install -y --no-install-recommends \
     file \
     qemu-user-static
 
+if [[ -n "${GCC_VERSION+x}" ]]; then
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-g++ riscv64-linux-gnu-g++ /usr/bin/riscv64-linux-gnu-g++${GCC_VERSION:+-${GCC_VERSION}} 100
+fi
+
 riscv64-linux-gnu-gcc --version | head -n1
 qemu-riscv64-static --version | head -n1
diff --git a/tools/cmake/preset/riscv64_linux.cmake b/tools/cmake/preset/riscv64_linux.cmake
index 32b891cd743..c094534b594 100644
--- a/tools/cmake/preset/riscv64_linux.cmake
+++ b/tools/cmake/preset/riscv64_linux.cmake
@@ -9,3 +9,17 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
 set_overridable_option(EXECUTORCH_BUILD_DEVTOOLS ON)
 set_overridable_option(EXECUTORCH_ENABLE_BUNDLE_IO ON)
 set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON)
+
+if(EXECUTORCH_BUILD_XNNPACK)
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS 14)
+    message(FATAL_ERROR "XNNPACK requires GCC 14+ on riscv64")
+  endif()
+elseif(NOT DEFINED EXECUTORCH_BUILD_XNNPACK)
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL
+                                 14
+  )
+    set(EXECUTORCH_BUILD_XNNPACK ON)
+  else()
+    message(NOTICE "XNNPACK requires GCC 14+ on riscv64")
+  endif()
+endif()

From eb5d50da53d5134ff33c806d617da28f5d56ac79 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 12 May 2026 02:21:06 +0200
Subject: [PATCH 2/7] Add MobileNetV2 testing on riscv64 (fp32 + quantize)

---
 .ci/scripts/test_riscv_qemu.sh    | 13 ++++-
 .github/workflows/_test_riscv.yml | 12 ++++-
 .github/workflows/riscv64.yml     | 10 +++-
 examples/riscv/aot_riscv.py       | 86 +++++++++++++++++++++++++------
 examples/riscv/requirements.txt   |  1 +
 examples/riscv/run.sh             | 19 ++++++-
 examples/riscv/setup.sh           |  6 +++
 7 files changed, 124 insertions(+), 23 deletions(-)
 create mode 100644 examples/riscv/requirements.txt

diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
index 658c562ccf2..88d7308bc5e 100755
--- a/.ci/scripts/test_riscv_qemu.sh
+++ b/.ci/scripts/test_riscv_qemu.sh
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the
-# RISC-V Phase 1 smoke test (export, cross-compile, qemu-user execution) via
+# RISC-V smoke test (export, cross-compile, qemu-user execution) via
 # examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS
 # check are done by run.sh.
 
@@ -14,20 +14,26 @@ set -eu
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
+model="add"
 xnnpack=false
+quantize=false
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
+  --model=<NAME>  Which model to export and run (default: add)
   --xnnpack       Enable the XNNPACK backend (AOT partitioner + runtime)
+  --quantize      Produce an 8-bit quantized model
   -h, --help      Show this help
 EOF
 }
 
 for arg in "$@"; do
     case $arg in
+        --model=*) model="${arg#*=}" ;;
         --xnnpack) xnnpack=true ;;
+        --quantize) quantize=true ;;
         -h|--help) usage; exit 0 ;;
         *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
     esac
@@ -37,6 +43,9 @@ run_extra_args=()
 if ${xnnpack}; then
     run_extra_args+=(--xnnpack)
 fi
+if ${quantize}; then
+    run_extra_args+=(--quantize)
+fi
 
 bash "${et_root_dir}/examples/riscv/setup.sh"
-bash "${et_root_dir}/examples/riscv/run.sh" "${run_extra_args[@]}"
+bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"
diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml
index ad919578348..892df2219ae 100644
--- a/.github/workflows/_test_riscv.yml
+++ b/.github/workflows/_test_riscv.yml
@@ -12,11 +12,21 @@ on:
         required: false
         type: number
         default: 30
+      model:
+        description: 'Which model to run. Possible values are: add, mv2 (mobilenetv2)'
+        required: false
+        type: string
+        default: 'add'
       xnnpack:
         description: 'Whether to enable XNNPACK'
         required: false
         type: boolean
         default: false
+      quantize:
+        description: 'Produce an 8-bit quantized model'
+        required: false
+        type: boolean
+        default: false
       gcc-version:
         description: 'The version of GCC to use'
         required: false
@@ -43,4 +53,4 @@ jobs:
         install_executorch "--use-pt-pinned-commit"
 
         export GCC_VERSION=${{ inputs.gcc-version }}
-        bash .ci/scripts/test_riscv_qemu.sh ${{ inputs.xnnpack && '--xnnpack' }}
+        bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' }} ${{ inputs.quantize && '--quantize' }}
diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index bb935d90b6c..ef070d267d2 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -28,13 +28,19 @@ jobs:
     strategy:
       matrix:
         include:
-          - { model: add, xnnpack: false }
-          - { model: add, xnnpack: true }
+          - { model: add, xnnpack: false, quantize: false }
+          - { model: add, xnnpack: true,  quantize: false }
+          - { model: mv2, xnnpack: false, quantize: false }
+          - { model: mv2, xnnpack: false, quantize: true }
+          - { model: mv2, xnnpack: true,  quantize: false }
+          - { model: mv2, xnnpack: true,  quantize: true }
     permissions:
       id-token: write
       contents: read
     with:
+      model: ${{ matrix.model }}
       xnnpack: ${{ matrix.xnnpack }}
+      quantize: ${{ matrix.quantize }}
       # XNNPACK requires GCC 14+
       gcc-version: ${{ matrix.xnnpack && 14 }}
       docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' }}
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index f4df2dbb83a..7b8bd365ac2 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -3,11 +3,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-"""AOT export for the RISC-V Phase 1.0 smoke test.
+"""AOT export for the RISC-V smoke test.
 
-Exports a trivial ``torch.add`` module to a BundledProgram (.bpte) that the
-portable executor_runner can load on a riscv64 target and verify against the
-embedded reference output, emitting ``Test_result: PASS`` on success.
+Exports a small model to a BundledProgram (.bpte) that the portable
+executor_runner can load on a riscv64 target and verify against the embedded
+reference output, emitting ``Test_result: PASS`` on success.
 """
 
 import argparse
@@ -28,47 +28,101 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         return x + y
 
 
+def build_add():
+    model = AddModule().eval()
+    example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0))
+    test_inputs = [
+        (torch.ones(1, 4), torch.full((1, 4), 2.0)),
+        (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)),
+    ]
+    return model, example_inputs, test_inputs, True
+
+
+def build_mv2():
+    from torchvision.models import mobilenet_v2, MobileNet_V2_Weights
+
+    model = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()
+    torch.manual_seed(0)
+    example_inputs = (torch.randn(1, 3, 224, 224),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+MODELS = {"add": build_add, "mv2": build_mv2}
+
+
 def main() -> None:
     parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--model",
+        choices=sorted(MODELS),
+        default="add",
+        help="Which model to export",
+    )
     parser.add_argument(
         "--output",
         type=Path,
-        default=Path("add_riscv.bpte"),
-        help="Output .bpte path",
+        default=None,
+        help="Output .bpte path (default: <model>_riscv.bpte)",
     )
     parser.add_argument(
         "--xnnpack",
         action="store_true",
         help="Lower through the XNNPACK partitioner",
     )
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help="Produce an 8-bit quantized model",
+    )
     args = parser.parse_args()
 
-    model = AddModule().eval()
-    example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0))
+    if args.output is None:
+        args.output = Path(f"{args.model}_riscv.bpte")
 
-    exported = export(model, example_inputs)
+    model, example_inputs, test_inputs, strict = MODELS[args.model]()
+
+    if args.quantize:
+        from executorch.examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType
+        from executorch.examples.xnnpack.quantization.utils import quantize
+
+        if args.model not in MODEL_NAME_TO_OPTIONS:
+            parser.error(f"No XNNPACK quantization recipe for model {args.model!r}")
+        quant_type = MODEL_NAME_TO_OPTIONS[args.model].quantization
+        if quant_type == QuantType.NONE:
+            parser.error(f"Quantization recipe for {args.model!r} is NONE")
+        ep = export(model, example_inputs, strict=strict)
+        model = quantize(ep.module(), example_inputs, quant_type)
+
+    exported = export(model, example_inputs, strict=strict)
     partitioners = []
     if args.xnnpack:
         from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
             XnnpackPartitioner,
         )
-
         partitioners.append(XnnpackPartitioner())
 
-    edge = to_edge_transform_and_lower(exported, partitioner=partitioners)
+    compile_config = None
+    if args.quantize:
+        from executorch.exir import EdgeCompileConfig
+
+        compile_config = EdgeCompileConfig(_check_ir_validity=False)
+
+    edge = to_edge_transform_and_lower(
+        exported, partitioner=partitioners, compile_config=compile_config
+    )
     delegated = sum(
         1
         for n in edge.exported_program().graph.nodes
         if n.op == "call_function" and "call_delegate" in str(n.target)
     )
-    print(f"[aot_riscv] xnnpack={args.xnnpack} delegated_nodes={delegated}")
+    print(
+        f"[aot_riscv] model={args.model} xnnpack={args.xnnpack} "
+        f"quantize={args.quantize} delegated_nodes={delegated}"
+    )
 
     et_program = edge.to_executorch()
 
-    test_inputs = [
-        (torch.ones(1, 4), torch.full((1, 4), 2.0)),
-        (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)),
-    ]
     test_suite = MethodTestSuite(
         method_name="forward",
         test_cases=[
diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt
new file mode 100644
index 00000000000..e35531e566f
--- /dev/null
+++ b/examples/riscv/requirements.txt
@@ -0,0 +1 @@
+torchvision
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index d5deb016fcc..9b2f950d8b8 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -20,13 +20,17 @@ build_dir="${et_root_dir}/cmake-out-riscv"
 output_dir="${et_root_dir}/riscv_test"
 qemu="qemu-riscv64-static"
 qemu_timeout="600"
+model="add"
 xnnpack=false
+quantize=false
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
+  --model=<NAME>          Which model to export and run (default: ${model})
   --xnnpack               Enable the XNNPACK backend (AOT partitioner + runtime)
+  --quantize              Produce an 8-bit quantized model
   --build_only            Only export and cross-compile; do not invoke QEMU
   --build_dir=<DIR>       CMake build directory (default: ${build_dir})
   --output_dir=<DIR>      Directory for the exported .bpte (default: ${output_dir})
@@ -38,7 +42,9 @@ EOF
 
 for arg in "$@"; do
     case $arg in
+        --model=*) model="${arg#*=}" ;;
         --xnnpack) xnnpack=true ;;
+        --quantize) quantize=true ;;
         --build_only) build_only=true ;;
         --build_dir=*) build_dir="${arg#*=}" ;;
         --output_dir=*) output_dir="${arg#*=}" ;;
@@ -50,14 +56,17 @@ for arg in "$@"; do
 done
 
 mkdir -p "${output_dir}"
-bpte_path="${output_dir}/add_riscv.bpte"
+bpte_path="${output_dir}/${model}_riscv.bpte"
 
 echo "[run.sh] Step 1/3: AOT export on host"
 aot_extra_args=()
 if ${xnnpack}; then
     aot_extra_args+=(--xnnpack)
 fi
-python "${script_dir}/aot_riscv.py" "${aot_extra_args[@]}" --output "${bpte_path}"
+if ${quantize}; then
+    aot_extra_args+=(--quantize)
+fi
+python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}"
 
 echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux"
 cmake_extra_args=()
@@ -99,9 +108,15 @@ export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}"
 log_file=$(mktemp)
 trap 'rm -f "${log_file}"' EXIT
 
+runner_extra_args=()
+if ${quantize}; then
+    runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25)
+fi
+
 set +e
 timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \
     --model_path="${bpte_path}" \
+    "${runner_extra_args[@]}" \
     2>&1 | tee "${log_file}"
 qemu_status=${PIPESTATUS[0]}
 set -e
diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh
index 6fba1c25bdb..955c8ca3386 100755
--- a/examples/riscv/setup.sh
+++ b/examples/riscv/setup.sh
@@ -10,6 +10,8 @@
 
 set -eu
 
+script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+
 if ! command -v apt-get >/dev/null 2>&1; then
     echo "[$(basename "$0")] this setup script targets Debian/Ubuntu (apt-get not found)" >&2
     exit 1
@@ -30,6 +32,7 @@ ${SUDO} apt-get install -y --no-install-recommends \
     libc6-dev-riscv64-cross \
     cmake \
     file \
+    ca-certificates \
     qemu-user-static
 
 if [[ -n "${GCC_VERSION+x}" ]]; then
@@ -39,3 +42,6 @@ fi
 
 riscv64-linux-gnu-gcc --version | head -n1
 qemu-riscv64-static --version | head -n1
+
+# Some python packages also need to be installed
+pip install -r "${script_dir}/requirements.txt"

From 190c6f011d0b516f622fb6c3367c48ede09424bb Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 12 May 2026 03:07:26 +0200
Subject: [PATCH 3/7] Add --verbose flag for riscv64 scripts

---
 .ci/scripts/test_riscv_qemu.sh |  5 +++++
 examples/riscv/aot_riscv.py    | 17 ++++++++++++++++-
 examples/riscv/run.sh          |  6 ++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
index 88d7308bc5e..0d8b2815f74 100755
--- a/.ci/scripts/test_riscv_qemu.sh
+++ b/.ci/scripts/test_riscv_qemu.sh
@@ -17,6 +17,7 @@ et_root_dir=$(realpath "${script_dir}/../..")
 model="add"
 xnnpack=false
 quantize=false
+verbose=false
 
 usage() {
     cat <<EOF
@@ -34,6 +35,7 @@ for arg in "$@"; do
         --model=*) model="${arg#*=}" ;;
         --xnnpack) xnnpack=true ;;
         --quantize) quantize=true ;;
+        --verbose) verbose=true ;;
         -h|--help) usage; exit 0 ;;
         *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
     esac
@@ -46,6 +48,9 @@ fi
 if ${quantize}; then
     run_extra_args+=(--quantize)
 fi
+if ${verbose}; then
+    run_extra_args+=(--verbose)
+fi
 
 bash "${et_root_dir}/examples/riscv/setup.sh"
 bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 7b8bd365ac2..3c504ed7201 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -11,6 +11,7 @@
 """
 
 import argparse
+import logging
 from pathlib import Path
 
 import torch
@@ -75,8 +76,16 @@ def main() -> None:
         action="store_true",
         help="Produce an 8-bit quantized model",
     )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable XNNPACK partitioner DEBUG logging and dump the lowered graph",
+    )
     args = parser.parse_args()
 
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+
     if args.output is None:
         args.output = Path(f"{args.model}_riscv.bpte")
 
@@ -100,7 +109,8 @@ def main() -> None:
         from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
             XnnpackPartitioner,
         )
-        partitioners.append(XnnpackPartitioner())
+
+        partitioners.append(XnnpackPartitioner(verbose=args.verbose))
 
     compile_config = None
     if args.quantize:
@@ -121,6 +131,11 @@ def main() -> None:
         f"quantize={args.quantize} delegated_nodes={delegated}"
     )
 
+    if args.verbose:
+        from executorch.exir.backend.utils import print_delegated_graph
+
+        print_delegated_graph(edge.exported_program().graph_module)
+
     et_program = edge.to_executorch()
 
     test_suite = MethodTestSuite(
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index 9b2f950d8b8..644944ab8a4 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -23,6 +23,7 @@ qemu_timeout="600"
 model="add"
 xnnpack=false
 quantize=false
+verbose=false
 
 usage() {
     cat <<EOF
@@ -31,6 +32,7 @@ Options:
   --model=<NAME>          Which model to export and run (default: ${model})
   --xnnpack               Enable the XNNPACK backend (AOT partitioner + runtime)
   --quantize              Produce an 8-bit quantized model
+  --verbose               Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
   --build_only            Only export and cross-compile; do not invoke QEMU
   --build_dir=<DIR>       CMake build directory (default: ${build_dir})
   --output_dir=<DIR>      Directory for the exported .bpte (default: ${output_dir})
@@ -45,6 +47,7 @@ for arg in "$@"; do
         --model=*) model="${arg#*=}" ;;
         --xnnpack) xnnpack=true ;;
         --quantize) quantize=true ;;
+        --verbose) verbose=true ;;
         --build_only) build_only=true ;;
         --build_dir=*) build_dir="${arg#*=}" ;;
         --output_dir=*) output_dir="${arg#*=}" ;;
@@ -66,6 +69,9 @@ fi
 if ${quantize}; then
     aot_extra_args+=(--quantize)
 fi
+if ${verbose}; then
+    aot_extra_args+=(--verbose)
+fi
 python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}"
 
 echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux"

From 9d6221c063754e39f8ec7a424c7ccdb4f1d36bf4 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Tue, 12 May 2026 18:20:04 +0200
Subject: [PATCH 4/7] Add MobileBERT testing on riscv64 (fp32 + quantize)

---
 .github/workflows/riscv64.yml   | 14 ++++++++------
 examples/riscv/aot_riscv.py     | 33 ++++++++++++++++++++++++++++++++-
 examples/riscv/requirements.txt |  1 +
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index ef070d267d2..2e38126c25a 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -28,12 +28,14 @@ jobs:
     strategy:
       matrix:
         include:
-          - { model: add, xnnpack: false, quantize: false }
-          - { model: add, xnnpack: true,  quantize: false }
-          - { model: mv2, xnnpack: false, quantize: false }
-          - { model: mv2, xnnpack: false, quantize: true }
-          - { model: mv2, xnnpack: true,  quantize: false }
-          - { model: mv2, xnnpack: true,  quantize: true }
+          - { model: add,        xnnpack: false, quantize: false }
+          - { model: add,        xnnpack: true,  quantize: false }
+          - { model: mv2,        xnnpack: false, quantize: false }
+          - { model: mv2,        xnnpack: true,  quantize: false }
+          - { model: mv2,        xnnpack: true,  quantize: true }
+          - { model: mobilebert, xnnpack: false, quantize: false }
+          - { model: mobilebert, xnnpack: true,  quantize: false }
+          - { model: mobilebert, xnnpack: true,  quantize: true }
     permissions:
       id-token: write
       contents: read
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 3c504ed7201..88e239860b0 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -49,7 +49,38 @@ def build_mv2():
     return model, example_inputs, test_inputs, False
 
 
-MODELS = {"add": build_add, "mv2": build_mv2}
+def build_mobilebert():
+    from transformers import MobileBertConfig, MobileBertModel
+
+    config = MobileBertConfig(
+        vocab_size=1024,
+        hidden_size=128,
+        embedding_size=64,
+        num_hidden_layers=2,
+        num_attention_heads=2,
+        intermediate_size=128,
+        intra_bottleneck_size=32,
+    )
+
+    class Wrapper(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.model = MobileBertModel(config).eval()
+
+        def forward(self, input_ids):
+            return self.model(input_ids).last_hidden_state
+
+    model = Wrapper().eval()
+    example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]]),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+MODELS = {
+    "add": build_add,
+    "mv2": build_mv2,
+    "mobilebert": build_mobilebert,
+}
 
 
 def main() -> None:
diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt
index e35531e566f..273e7156a1d 100644
--- a/examples/riscv/requirements.txt
+++ b/examples/riscv/requirements.txt
@@ -1 +1,2 @@
 torchvision
+transformers

From 8dac448412a0a7f4232c65d9721996037cb4ad98 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 16 May 2026 02:30:33 +0200
Subject: [PATCH 5/7] Add a Llama testing on riscv64 (fp32 + quantize)

---
 .github/workflows/riscv64.yml |  3 +++
 examples/riscv/aot_riscv.py   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index 2e38126c25a..fec5c1714ed 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -36,6 +36,9 @@ jobs:
           - { model: mobilebert, xnnpack: false, quantize: false }
           - { model: mobilebert, xnnpack: true,  quantize: false }
           - { model: mobilebert, xnnpack: true,  quantize: true }
+          - { model: llama2,     xnnpack: false, quantize: false }
+          - { model: llama2,     xnnpack: true,  quantize: false }
+          - { model: llama2,     xnnpack: true,  quantize: true }
     permissions:
       id-token: write
       contents: read
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 88e239860b0..1225ac84f19 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -76,10 +76,41 @@ def forward(self, input_ids):
     return model, example_inputs, test_inputs, False
 
 
+def build_llama2():
+    # Use the executorch native Transformer (matches MODEL_NAME_TO_MODEL["llama2"]
+    # in examples/models/__init__.py). Unlike HF LlamaModel, RoPE freqs are
+    # precomputed buffers and just sliced at forward time, so no
+    # torch.arange()/Long causal mask is built per forward — which is what
+    # the PT2E XNNPACK quantizer trips over on HF Llama.
+    from executorch.examples.models.llama.llama_transformer import (
+        construct_transformer,
+    )
+    from executorch.examples.models.llama.model_args import ModelArgs
+
+    seq_len = 8
+    args = ModelArgs(
+        dim=128,
+        n_layers=2,
+        n_heads=4,
+        n_kv_heads=2,            # GQA: kv_heads < n_heads exercises the GQA path
+        vocab_size=1024,
+        hidden_dim=256,          # SwiGLU FFN: gate + up projections at this width
+        max_seq_len=seq_len,
+        max_context_len=seq_len,
+        rope_theta=10000.0,
+    )
+    torch.manual_seed(0)
+    model = construct_transformer(args).eval()
+    example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]], dtype=torch.long),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
 MODELS = {
     "add": build_add,
     "mv2": build_mv2,
     "mobilebert": build_mobilebert,
+    "llama2": build_llama2,
 }
 
 

From 1eae4652436a2bd6d60334babee5fe166ea16d41 Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 16 May 2026 11:11:45 +0200
Subject: [PATCH 6/7] Add ResNet18 testing on riscv64 (fp32 + quantize)

---
 .github/workflows/riscv64.yml |  3 +++
 examples/riscv/aot_riscv.py   | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index fec5c1714ed..b32924046a4 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -39,6 +39,9 @@ jobs:
           - { model: llama2,     xnnpack: false, quantize: false }
           - { model: llama2,     xnnpack: true,  quantize: false }
           - { model: llama2,     xnnpack: true,  quantize: true }
+          - { model: resnet18,   xnnpack: false, quantize: false }
+          - { model: resnet18,   xnnpack: true,  quantize: false }
+          - { model: resnet18,   xnnpack: true,  quantize: true }
     permissions:
       id-token: write
       contents: read
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 1225ac84f19..0ed6af9a68e 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -106,11 +106,22 @@ def build_llama2():
     return model, example_inputs, test_inputs, False
 
 
+def build_resnet18():
+    from torchvision.models import resnet18, ResNet18_Weights
+
+    model = resnet18(weights=ResNet18_Weights.DEFAULT).eval()
+    torch.manual_seed(0)
+    example_inputs = (torch.randn(1, 3, 224, 224),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
 MODELS = {
     "add": build_add,
     "mv2": build_mv2,
     "mobilebert": build_mobilebert,
     "llama2": build_llama2,
+    "resnet18": build_resnet18,
 }
 
 

From 4b5e42e5c6e147ad32d70a396cd9b715793f76bc Mon Sep 17 00:00:00 2001
From: Ludovic Henry <git@ludovic.dev>
Date: Sat, 16 May 2026 13:34:00 +0200
Subject: [PATCH 7/7] lintrunner

---
 examples/riscv/aot_riscv.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 0ed6af9a68e..22e8b31df73 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -82,9 +82,7 @@ def build_llama2():
     # precomputed buffers and just sliced at forward time, so no
     # torch.arange()/Long causal mask is built per forward — which is what
     # the PT2E XNNPACK quantizer trips over on HF Llama.
-    from executorch.examples.models.llama.llama_transformer import (
-        construct_transformer,
-    )
+    from executorch.examples.models.llama.llama_transformer import construct_transformer
     from executorch.examples.models.llama.model_args import ModelArgs
 
     seq_len = 8
@@ -92,9 +90,9 @@ def build_llama2():
         dim=128,
         n_layers=2,
         n_heads=4,
-        n_kv_heads=2,            # GQA: kv_heads < n_heads exercises the GQA path
+        n_kv_heads=2,  # GQA: kv_heads < n_heads exercises the GQA path
         vocab_size=1024,
-        hidden_dim=256,          # SwiGLU FFN: gate + up projections at this width
+        hidden_dim=256,  # SwiGLU FFN: gate + up projections at this width
         max_seq_len=seq_len,
         max_context_len=seq_len,
         rope_theta=10000.0,