From 58cd50c6bea9413a46712c9c21d3d545210ae75f Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 12 May 2026 01:12:00 +0200 Subject: [PATCH 1/7] Add XNNPACK backend testing for riscv64 It requires GCC 14+ to compile. --- .ci/scripts/test_riscv_qemu.sh | 26 +++++++++++++++++++++++++- .github/workflows/_test_riscv.yml | 18 ++++++++++++++++-- .github/workflows/riscv64.yml | 10 ++++++++++ examples/riscv/aot_riscv.py | 23 ++++++++++++++++++++++- examples/riscv/run.sh | 14 +++++++++++++- examples/riscv/setup.sh | 9 +++++++-- tools/cmake/preset/riscv64_linux.cmake | 14 ++++++++++++++ 7 files changed, 107 insertions(+), 7 deletions(-) diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh index 27ab57f3b09..658c562ccf2 100755 --- a/.ci/scripts/test_riscv_qemu.sh +++ b/.ci/scripts/test_riscv_qemu.sh @@ -14,5 +14,29 @@ set -eu script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") et_root_dir=$(realpath "${script_dir}/../..") +xnnpack=false + +usage() { + cat <&2; usage; exit 1 ;; + esac +done + +run_extra_args=() +if ${xnnpack}; then + run_extra_args+=(--xnnpack) +fi + bash "${et_root_dir}/examples/riscv/setup.sh" -bash "${et_root_dir}/examples/riscv/run.sh" +bash "${et_root_dir}/examples/riscv/run.sh" "${run_extra_args[@]}" diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index 79eec6cbb4c..ad919578348 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -12,13 +12,26 @@ on: required: false type: number default: 30 + xnnpack: + description: 'Whether to enable XNNPACK' + required: false + type: boolean + default: false + gcc-version: + description: 'The version of GCC to use' + required: false + type: number + docker-image: + description: 'The docker image to use for this job' + required: false + type: string jobs: run: uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.2xlarge - docker-image: ci-image:executorch-ubuntu-22.04-gcc11 + docker-image: ${{ inputs.docker-image || 'ci-image:executorch-ubuntu-22.04-gcc11' }} submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: ${{ inputs.timeout }} @@ -29,4 +42,5 @@ jobs: source .ci/scripts/utils.sh install_executorch "--use-pt-pinned-commit" - bash .ci/scripts/test_riscv_qemu.sh + export GCC_VERSION=${{ inputs.gcc-version }} + bash .ci/scripts/test_riscv_qemu.sh ${{ inputs.xnnpack && '--xnnpack' }} diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index aa970dc6270..bb935d90b6c 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -25,6 +25,16 @@ jobs: test-riscv: name: test-riscv uses: ./.github/workflows/_test_riscv.yml + strategy: + matrix: + include: + - { model: add, xnnpack: false } + - { model: add, xnnpack: true } permissions: id-token: write contents: read + with: + xnnpack: ${{ matrix.xnnpack }} + # XNNPACK requires GCC 14+ + gcc-version: ${{ matrix.xnnpack && 14 }} + docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' }} diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 8076f056ba2..f4df2dbb83a 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -36,13 +36,34 @@ def main() -> None: default=Path("add_riscv.bpte"), help="Output .bpte path", ) + parser.add_argument( + "--xnnpack", + action="store_true", + help="Lower through the XNNPACK partitioner", + ) args = parser.parse_args() model = AddModule().eval() example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0)) exported = export(model, example_inputs) - et_program = to_edge_transform_and_lower(exported).to_executorch() + partitioners = [] + if args.xnnpack: + from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( + XnnpackPartitioner, + ) + + partitioners.append(XnnpackPartitioner()) + + edge = to_edge_transform_and_lower(exported, partitioner=partitioners) + delegated = sum( + 1 + for n in edge.exported_program().graph.nodes + if n.op == "call_function" and "call_delegate" in str(n.target) + ) + print(f"[aot_riscv] xnnpack={args.xnnpack} delegated_nodes={delegated}") + + et_program = edge.to_executorch() test_inputs = [ (torch.ones(1, 4), torch.full((1, 4), 2.0)), diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index 7c05edcbc8c..d5deb016fcc 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -20,11 +20,13 @@ build_dir="${et_root_dir}/cmake-out-riscv" output_dir="${et_root_dir}/riscv_test" qemu="qemu-riscv64-static" qemu_timeout="600" +xnnpack=false usage() { cat < CMake build directory (default: ${build_dir}) --output_dir= Directory for the exported .bpte (default: ${output_dir}) @@ -36,6 +38,7 @@ EOF for arg in "$@"; do case $arg in + --xnnpack) xnnpack=true ;; --build_only) build_only=true ;; --build_dir=*) build_dir="${arg#*=}" ;; --output_dir=*) output_dir="${arg#*=}" ;; @@ -50,11 +53,20 @@ mkdir -p "${output_dir}" bpte_path="${output_dir}/add_riscv.bpte" echo "[run.sh] Step 1/3: AOT export on host" -python "${script_dir}/aot_riscv.py" --output "${bpte_path}" +aot_extra_args=() +if ${xnnpack}; then + aot_extra_args+=(--xnnpack) +fi +python "${script_dir}/aot_riscv.py" "${aot_extra_args[@]}" --output "${bpte_path}" echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux" +cmake_extra_args=() +if ${xnnpack}; then + cmake_extra_args+=(-DEXECUTORCH_BUILD_XNNPACK=ON) +fi cmake -S "${et_root_dir}" -B "${build_dir}" \ --preset riscv64-linux \ + "${cmake_extra_args[@]}" \ -DCMAKE_BUILD_TYPE=Release cmake --build "${build_dir}" -j"$(nproc)" --target executor_runner diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh index c1342c60d5e..6fba1c25bdb 100755 --- a/examples/riscv/setup.sh +++ b/examples/riscv/setup.sh @@ -23,8 +23,8 @@ fi ${SUDO} apt-get update ${SUDO} apt-get install -y --no-install-recommends \ build-essential \ - gcc-riscv64-linux-gnu \ - g++-riscv64-linux-gnu \ + gcc${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \ + g++${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \ binutils-riscv64-linux-gnu \ libc6-riscv64-cross \ libc6-dev-riscv64-cross \ @@ -32,5 +32,10 @@ ${SUDO} apt-get install -y --no-install-recommends \ file \ qemu-user-static +if [[ -n "${GCC_VERSION+x}" ]]; then + ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100 + ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-g++ riscv64-linux-gnu-g++ /usr/bin/riscv64-linux-gnu-g++${GCC_VERSION:+-${GCC_VERSION}} 100 +fi + riscv64-linux-gnu-gcc --version | head -n1 qemu-riscv64-static --version | head -n1 diff --git a/tools/cmake/preset/riscv64_linux.cmake b/tools/cmake/preset/riscv64_linux.cmake index 32b891cd743..c094534b594 100644 --- a/tools/cmake/preset/riscv64_linux.cmake +++ b/tools/cmake/preset/riscv64_linux.cmake @@ -9,3 +9,17 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON) set_overridable_option(EXECUTORCH_BUILD_DEVTOOLS ON) set_overridable_option(EXECUTORCH_ENABLE_BUNDLE_IO ON) set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON) + +if(EXECUTORCH_BUILD_XNNPACK) + if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS 14) + message(FATAL_ERROR "XNNPACK requires GCC 14+ on riscv64") + endif() +elseif(NOT DEFINED EXECUTORCH_BUILD_XNNPACK) + if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL + 14 + ) + set(EXECUTORCH_BUILD_XNNPACK ON) + else() + message(NOTICE "XNNPACK requires GCC 14+ on riscv64") + endif() +endif() From eb5d50da53d5134ff33c806d617da28f5d56ac79 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 12 May 2026 02:21:06 +0200 Subject: [PATCH 2/7] Add MobileNetV2 testing on riscv64 (fp32 + quantize) --- .ci/scripts/test_riscv_qemu.sh | 13 ++++- .github/workflows/_test_riscv.yml | 12 ++++- .github/workflows/riscv64.yml | 10 +++- examples/riscv/aot_riscv.py | 86 +++++++++++++++++++++++++------ examples/riscv/requirements.txt | 1 + examples/riscv/run.sh | 19 ++++++- examples/riscv/setup.sh | 6 +++ 7 files changed, 124 insertions(+), 23 deletions(-) create mode 100644 examples/riscv/requirements.txt diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh index 658c562ccf2..88d7308bc5e 100755 --- a/.ci/scripts/test_riscv_qemu.sh +++ b/.ci/scripts/test_riscv_qemu.sh @@ -5,7 +5,7 @@ # LICENSE file in the root directory of this source tree. # CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the -# RISC-V Phase 1 smoke test (export, cross-compile, qemu-user execution) via +# RISC-V smoke test (export, cross-compile, qemu-user execution) via # examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS # check are done by run.sh. @@ -14,20 +14,26 @@ set -eu script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")") et_root_dir=$(realpath "${script_dir}/../..") +model="add" xnnpack=false +quantize=false usage() { cat < Which model to export and run (default: add) --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) + --quantize Produce an 8-bit quantized model -h, --help Show this help EOF } for arg in "$@"; do case $arg in + --model=*) model="${arg#*=}" ;; --xnnpack) xnnpack=true ;; + --quantize) quantize=true ;; -h|--help) usage; exit 0 ;; *) echo "Unknown option: $arg" >&2; usage; exit 1 ;; esac @@ -37,6 +43,9 @@ run_extra_args=() if ${xnnpack}; then run_extra_args+=(--xnnpack) fi +if ${quantize}; then + run_extra_args+=(--quantize) +fi bash "${et_root_dir}/examples/riscv/setup.sh" -bash "${et_root_dir}/examples/riscv/run.sh" "${run_extra_args[@]}" +bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}" diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index ad919578348..892df2219ae 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -12,11 +12,21 @@ on: required: false type: number default: 30 + model: + description: 'Which model to run. Possible values are: add, mv2 (mobilenetv2)' + required: false + type: string + default: 'add' xnnpack: description: 'Whether to enable XNNPACK' required: false type: boolean default: false + quantize: + description: 'Produce an 8-bit quantized model' + required: false + type: boolean + default: false gcc-version: description: 'The version of GCC to use' required: false @@ -43,4 +53,4 @@ jobs: install_executorch "--use-pt-pinned-commit" export GCC_VERSION=${{ inputs.gcc-version }} - bash .ci/scripts/test_riscv_qemu.sh ${{ inputs.xnnpack && '--xnnpack' }} + bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' }} ${{ inputs.quantize && '--quantize' }} diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index bb935d90b6c..ef070d267d2 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -28,13 +28,19 @@ jobs: strategy: matrix: include: - - { model: add, xnnpack: false } - - { model: add, xnnpack: true } + - { model: add, xnnpack: false, quantize: false } + - { model: add, xnnpack: true, quantize: false } + - { model: mv2, xnnpack: false, quantize: false } + - { model: mv2, xnnpack: false, quantize: true } + - { model: mv2, xnnpack: true, quantize: false } + - { model: mv2, xnnpack: true, quantize: true } permissions: id-token: write contents: read with: + model: ${{ matrix.model }} xnnpack: ${{ matrix.xnnpack }} + quantize: ${{ matrix.quantize }} # XNNPACK requires GCC 14+ gcc-version: ${{ matrix.xnnpack && 14 }} docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' }} diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index f4df2dbb83a..7b8bd365ac2 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -3,11 +3,11 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -"""AOT export for the RISC-V Phase 1.0 smoke test. +"""AOT export for the RISC-V smoke test. -Exports a trivial ``torch.add`` module to a BundledProgram (.bpte) that the -portable executor_runner can load on a riscv64 target and verify against the -embedded reference output, emitting ``Test_result: PASS`` on success. +Exports a small model to a BundledProgram (.bpte) that the portable +executor_runner can load on a riscv64 target and verify against the embedded +reference output, emitting ``Test_result: PASS`` on success. """ import argparse @@ -28,47 +28,101 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y +def build_add(): + model = AddModule().eval() + example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0)) + test_inputs = [ + (torch.ones(1, 4), torch.full((1, 4), 2.0)), + (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)), + ] + return model, example_inputs, test_inputs, True + + +def build_mv2(): + from torchvision.models import mobilenet_v2, MobileNet_V2_Weights + + model = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval() + torch.manual_seed(0) + example_inputs = (torch.randn(1, 3, 224, 224),) + test_inputs = [example_inputs] + return model, example_inputs, test_inputs, False + + +MODELS = {"add": build_add, "mv2": build_mv2} + + def main() -> None: parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--model", + choices=sorted(MODELS), + default="add", + help="Which model to export", + ) parser.add_argument( "--output", type=Path, - default=Path("add_riscv.bpte"), - help="Output .bpte path", + default=None, + help="Output .bpte path (default: _riscv.bpte)", ) parser.add_argument( "--xnnpack", action="store_true", help="Lower through the XNNPACK partitioner", ) + parser.add_argument( + "--quantize", + action="store_true", + help="Produce an 8-bit quantized model", + ) args = parser.parse_args() - model = AddModule().eval() - example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0)) + if args.output is None: + args.output = Path(f"{args.model}_riscv.bpte") - exported = export(model, example_inputs) + model, example_inputs, test_inputs, strict = MODELS[args.model]() + + if args.quantize: + from executorch.examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType + from executorch.examples.xnnpack.quantization.utils import quantize + + if args.model not in MODEL_NAME_TO_OPTIONS: + parser.error(f"No XNNPACK quantization recipe for model {args.model!r}") + quant_type = MODEL_NAME_TO_OPTIONS[args.model].quantization + if quant_type == QuantType.NONE: + parser.error(f"Quantization recipe for {args.model!r} is NONE") + ep = export(model, example_inputs, strict=strict) + model = quantize(ep.module(), example_inputs, quant_type) + + exported = export(model, example_inputs, strict=strict) partitioners = [] if args.xnnpack: from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( XnnpackPartitioner, ) - partitioners.append(XnnpackPartitioner()) - edge = to_edge_transform_and_lower(exported, partitioner=partitioners) + compile_config = None + if args.quantize: + from executorch.exir import EdgeCompileConfig + + compile_config = EdgeCompileConfig(_check_ir_validity=False) + + edge = to_edge_transform_and_lower( + exported, partitioner=partitioners, compile_config=compile_config + ) delegated = sum( 1 for n in edge.exported_program().graph.nodes if n.op == "call_function" and "call_delegate" in str(n.target) ) - print(f"[aot_riscv] xnnpack={args.xnnpack} delegated_nodes={delegated}") + print( + f"[aot_riscv] model={args.model} xnnpack={args.xnnpack} " + f"quantize={args.quantize} delegated_nodes={delegated}" + ) et_program = edge.to_executorch() - test_inputs = [ - (torch.ones(1, 4), torch.full((1, 4), 2.0)), - (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)), - ] test_suite = MethodTestSuite( method_name="forward", test_cases=[ diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt new file mode 100644 index 00000000000..e35531e566f --- /dev/null +++ b/examples/riscv/requirements.txt @@ -0,0 +1 @@ +torchvision diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index d5deb016fcc..9b2f950d8b8 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -20,13 +20,17 @@ build_dir="${et_root_dir}/cmake-out-riscv" output_dir="${et_root_dir}/riscv_test" qemu="qemu-riscv64-static" qemu_timeout="600" +model="add" xnnpack=false +quantize=false usage() { cat < Which model to export and run (default: ${model}) --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) + --quantize Produce an 8-bit quantized model --build_only Only export and cross-compile; do not invoke QEMU --build_dir= CMake build directory (default: ${build_dir}) --output_dir= Directory for the exported .bpte (default: ${output_dir}) @@ -38,7 +42,9 @@ EOF for arg in "$@"; do case $arg in + --model=*) model="${arg#*=}" ;; --xnnpack) xnnpack=true ;; + --quantize) quantize=true ;; --build_only) build_only=true ;; --build_dir=*) build_dir="${arg#*=}" ;; --output_dir=*) output_dir="${arg#*=}" ;; @@ -50,14 +56,17 @@ for arg in "$@"; do done mkdir -p "${output_dir}" -bpte_path="${output_dir}/add_riscv.bpte" +bpte_path="${output_dir}/${model}_riscv.bpte" echo "[run.sh] Step 1/3: AOT export on host" aot_extra_args=() if ${xnnpack}; then aot_extra_args+=(--xnnpack) fi -python "${script_dir}/aot_riscv.py" "${aot_extra_args[@]}" --output "${bpte_path}" +if ${quantize}; then + aot_extra_args+=(--quantize) +fi +python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}" echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux" cmake_extra_args=() @@ -99,9 +108,15 @@ export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}" log_file=$(mktemp) trap 'rm -f "${log_file}"' EXIT +runner_extra_args=() +if ${quantize}; then + runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25) +fi + set +e timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \ --model_path="${bpte_path}" \ + "${runner_extra_args[@]}" \ 2>&1 | tee "${log_file}" qemu_status=${PIPESTATUS[0]} set -e diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh index 6fba1c25bdb..955c8ca3386 100755 --- a/examples/riscv/setup.sh +++ b/examples/riscv/setup.sh @@ -10,6 +10,8 @@ set -eu +script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + if ! command -v apt-get >/dev/null 2>&1; then echo "[$(basename "$0")] this setup script targets Debian/Ubuntu (apt-get not found)" >&2 exit 1 @@ -30,6 +32,7 @@ ${SUDO} apt-get install -y --no-install-recommends \ libc6-dev-riscv64-cross \ cmake \ file \ + ca-certificates \ qemu-user-static if [[ -n "${GCC_VERSION+x}" ]]; then @@ -39,3 +42,6 @@ fi riscv64-linux-gnu-gcc --version | head -n1 qemu-riscv64-static --version | head -n1 + +# Some python packages also need to be installed +pip install -r "${script_dir}/requirements.txt" From 190c6f011d0b516f622fb6c3367c48ede09424bb Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 12 May 2026 03:07:26 +0200 Subject: [PATCH 3/7] Add --verbose flag for riscv64 scripts --- .ci/scripts/test_riscv_qemu.sh | 5 +++++ examples/riscv/aot_riscv.py | 17 ++++++++++++++++- examples/riscv/run.sh | 6 ++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh index 88d7308bc5e..0d8b2815f74 100755 --- a/.ci/scripts/test_riscv_qemu.sh +++ b/.ci/scripts/test_riscv_qemu.sh @@ -17,6 +17,7 @@ et_root_dir=$(realpath "${script_dir}/../..") model="add" xnnpack=false quantize=false +verbose=false usage() { cat <&2; usage; exit 1 ;; esac @@ -46,6 +48,9 @@ fi if ${quantize}; then run_extra_args+=(--quantize) fi +if ${verbose}; then + run_extra_args+=(--verbose) +fi bash "${et_root_dir}/examples/riscv/setup.sh" bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}" diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 7b8bd365ac2..3c504ed7201 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -11,6 +11,7 @@ """ import argparse +import logging from pathlib import Path import torch @@ -75,8 +76,16 @@ def main() -> None: action="store_true", help="Produce an 8-bit quantized model", ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable XNNPACK partitioner DEBUG logging and dump the lowered graph", + ) args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + if args.output is None: args.output = Path(f"{args.model}_riscv.bpte") @@ -100,7 +109,8 @@ def main() -> None: from executorch.backends.xnnpack.partition.xnnpack_partitioner import ( XnnpackPartitioner, ) - partitioners.append(XnnpackPartitioner()) + + partitioners.append(XnnpackPartitioner(verbose=args.verbose)) compile_config = None if args.quantize: @@ -121,6 +131,11 @@ def main() -> None: f"quantize={args.quantize} delegated_nodes={delegated}" ) + if args.verbose: + from executorch.exir.backend.utils import print_delegated_graph + + print_delegated_graph(edge.exported_program().graph_module) + et_program = edge.to_executorch() test_suite = MethodTestSuite( diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index 9b2f950d8b8..644944ab8a4 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -23,6 +23,7 @@ qemu_timeout="600" model="add" xnnpack=false quantize=false +verbose=false usage() { cat < Which model to export and run (default: ${model}) --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) --quantize Produce an 8-bit quantized model + --verbose Enable XNNPACK partitioner DEBUG logging and dump the lowered graph --build_only Only export and cross-compile; do not invoke QEMU --build_dir= CMake build directory (default: ${build_dir}) --output_dir= Directory for the exported .bpte (default: ${output_dir}) @@ -45,6 +47,7 @@ for arg in "$@"; do --model=*) model="${arg#*=}" ;; --xnnpack) xnnpack=true ;; --quantize) quantize=true ;; + --verbose) verbose=true ;; --build_only) build_only=true ;; --build_dir=*) build_dir="${arg#*=}" ;; --output_dir=*) output_dir="${arg#*=}" ;; @@ -66,6 +69,9 @@ fi if ${quantize}; then aot_extra_args+=(--quantize) fi +if ${verbose}; then + aot_extra_args+=(--verbose) +fi python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}" echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux" From 9d6221c063754e39f8ec7a424c7ccdb4f1d36bf4 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Tue, 12 May 2026 18:20:04 +0200 Subject: [PATCH 4/7] Add MobileBERT testing on riscv64 (fp32 + quantize) --- .github/workflows/riscv64.yml | 14 ++++++++------ examples/riscv/aot_riscv.py | 33 ++++++++++++++++++++++++++++++++- examples/riscv/requirements.txt | 1 + 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index ef070d267d2..2e38126c25a 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -28,12 +28,14 @@ jobs: strategy: matrix: include: - - { model: add, xnnpack: false, quantize: false } - - { model: add, xnnpack: true, quantize: false } - - { model: mv2, xnnpack: false, quantize: false } - - { model: mv2, xnnpack: false, quantize: true } - - { model: mv2, xnnpack: true, quantize: false } - - { model: mv2, xnnpack: true, quantize: true } + - { model: add, xnnpack: false, quantize: false } + - { model: add, xnnpack: true, quantize: false } + - { model: mv2, xnnpack: false, quantize: false } + - { model: mv2, xnnpack: true, quantize: false } + - { model: mv2, xnnpack: true, quantize: true } + - { model: mobilebert, xnnpack: false, quantize: false } + - { model: mobilebert, xnnpack: true, quantize: false } + - { model: mobilebert, xnnpack: true, quantize: true } permissions: id-token: write contents: read diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 3c504ed7201..88e239860b0 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -49,7 +49,38 @@ def build_mv2(): return model, example_inputs, test_inputs, False -MODELS = {"add": build_add, "mv2": build_mv2} +def build_mobilebert(): + from transformers import MobileBertConfig, MobileBertModel + + config = MobileBertConfig( + vocab_size=1024, + hidden_size=128, + embedding_size=64, + num_hidden_layers=2, + num_attention_heads=2, + intermediate_size=128, + intra_bottleneck_size=32, + ) + + class Wrapper(torch.nn.Module): + def __init__(self): + super().__init__() + self.model = MobileBertModel(config).eval() + + def forward(self, input_ids): + return self.model(input_ids).last_hidden_state + + model = Wrapper().eval() + example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]]),) + test_inputs = [example_inputs] + return model, example_inputs, test_inputs, False + + +MODELS = { + "add": build_add, + "mv2": build_mv2, + "mobilebert": build_mobilebert, +} def main() -> None: diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt index e35531e566f..273e7156a1d 100644 --- a/examples/riscv/requirements.txt +++ b/examples/riscv/requirements.txt @@ -1 +1,2 @@ torchvision +transformers From 8dac448412a0a7f4232c65d9721996037cb4ad98 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 16 May 2026 02:30:33 +0200 Subject: [PATCH 5/7] Add a Llama testing on riscv64 (fp32 + quantize) --- .github/workflows/riscv64.yml | 3 +++ examples/riscv/aot_riscv.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index 2e38126c25a..fec5c1714ed 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -36,6 +36,9 @@ jobs: - { model: mobilebert, xnnpack: false, quantize: false } - { model: mobilebert, xnnpack: true, quantize: false } - { model: mobilebert, xnnpack: true, quantize: true } + - { model: llama2, xnnpack: false, quantize: false } + - { model: llama2, xnnpack: true, quantize: false } + - { model: llama2, xnnpack: true, quantize: true } permissions: id-token: write contents: read diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 88e239860b0..1225ac84f19 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -76,10 +76,41 @@ def forward(self, input_ids): return model, example_inputs, test_inputs, False +def build_llama2(): + # Use the executorch native Transformer (matches MODEL_NAME_TO_MODEL["llama2"] + # in examples/models/__init__.py). Unlike HF LlamaModel, RoPE freqs are + # precomputed buffers and just sliced at forward time, so no + # torch.arange()/Long causal mask is built per forward — which is what + # the PT2E XNNPACK quantizer trips over on HF Llama. + from executorch.examples.models.llama.llama_transformer import ( + construct_transformer, + ) + from executorch.examples.models.llama.model_args import ModelArgs + + seq_len = 8 + args = ModelArgs( + dim=128, + n_layers=2, + n_heads=4, + n_kv_heads=2, # GQA: kv_heads < n_heads exercises the GQA path + vocab_size=1024, + hidden_dim=256, # SwiGLU FFN: gate + up projections at this width + max_seq_len=seq_len, + max_context_len=seq_len, + rope_theta=10000.0, + ) + torch.manual_seed(0) + model = construct_transformer(args).eval() + example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]], dtype=torch.long),) + test_inputs = [example_inputs] + return model, example_inputs, test_inputs, False + + MODELS = { "add": build_add, "mv2": build_mv2, "mobilebert": build_mobilebert, + "llama2": build_llama2, } From 1eae4652436a2bd6d60334babee5fe166ea16d41 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 16 May 2026 11:11:45 +0200 Subject: [PATCH 6/7] Add ResNet18 testing on riscv64 (fp32 + quantize) --- .github/workflows/riscv64.yml | 3 +++ examples/riscv/aot_riscv.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index fec5c1714ed..b32924046a4 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -39,6 +39,9 @@ jobs: - { model: llama2, xnnpack: false, quantize: false } - { model: llama2, xnnpack: true, quantize: false } - { model: llama2, xnnpack: true, quantize: true } + - { model: resnet18, xnnpack: false, quantize: false } + - { model: resnet18, xnnpack: true, quantize: false } + - { model: resnet18, xnnpack: true, quantize: true } permissions: id-token: write contents: read diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 1225ac84f19..0ed6af9a68e 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -106,11 +106,22 @@ def build_llama2(): return model, example_inputs, test_inputs, False +def build_resnet18(): + from torchvision.models import resnet18, ResNet18_Weights + + model = resnet18(weights=ResNet18_Weights.DEFAULT).eval() + torch.manual_seed(0) + example_inputs = (torch.randn(1, 3, 224, 224),) + test_inputs = [example_inputs] + return model, example_inputs, test_inputs, False + + MODELS = { "add": build_add, "mv2": build_mv2, "mobilebert": build_mobilebert, "llama2": build_llama2, + "resnet18": build_resnet18, } From 4b5e42e5c6e147ad32d70a396cd9b715793f76bc Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 16 May 2026 13:34:00 +0200 Subject: [PATCH 7/7] lintrunner --- examples/riscv/aot_riscv.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 0ed6af9a68e..22e8b31df73 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -82,9 +82,7 @@ def build_llama2(): # precomputed buffers and just sliced at forward time, so no # torch.arange()/Long causal mask is built per forward — which is what # the PT2E XNNPACK quantizer trips over on HF Llama. - from executorch.examples.models.llama.llama_transformer import ( - construct_transformer, - ) + from executorch.examples.models.llama.llama_transformer import construct_transformer from executorch.examples.models.llama.model_args import ModelArgs seq_len = 8 @@ -92,9 +90,9 @@ def build_llama2(): dim=128, n_layers=2, n_heads=4, - n_kv_heads=2, # GQA: kv_heads < n_heads exercises the GQA path + n_kv_heads=2, # GQA: kv_heads < n_heads exercises the GQA path vocab_size=1024, - hidden_dim=256, # SwiGLU FFN: gate + up projections at this width + hidden_dim=256, # SwiGLU FFN: gate + up projections at this width max_seq_len=seq_len, max_context_len=seq_len, rope_theta=10000.0,