diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh
index 27ab57f3b09..0d8b2815f74 100755
--- a/.ci/scripts/test_riscv_qemu.sh
+++ b/.ci/scripts/test_riscv_qemu.sh
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 # CI wrapper: install RISC-V cross-compile + qemu-user tooling, then run the
-# RISC-V Phase 1 smoke test (export, cross-compile, qemu-user execution) via
+# RISC-V smoke test (export, cross-compile, qemu-user execution) via
 # examples/riscv/run.sh. The bundled-IO comparison and Test_result: PASS
 # check are done by run.sh.
 
@@ -14,5 +14,43 @@ set -eu
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
+model="add"
+xnnpack=false
+quantize=false
+verbose=false
+
+usage() {
+    cat <<EOF
+Usage: $(basename "$0") [options]
+Options:
+  --model=<NAME>  Which model to export and run (default: add)
+  --xnnpack       Enable the XNNPACK backend (AOT partitioner + runtime)
+  --quantize      Produce an 8-bit quantized model
+  -h, --help      Show this help
+EOF
+}
+
+for arg in "$@"; do
+    case $arg in
+        --model=*) model="${arg#*=}" ;;
+        --xnnpack) xnnpack=true ;;
+        --quantize) quantize=true ;;
+        --verbose) verbose=true ;;
+        -h|--help) usage; exit 0 ;;
+        *) echo "Unknown option: $arg" >&2; usage; exit 1 ;;
+    esac
+done
+
+run_extra_args=()
+if ${xnnpack}; then
+    run_extra_args+=(--xnnpack)
+fi
+if ${quantize}; then
+    run_extra_args+=(--quantize)
+fi
+if ${verbose}; then
+    run_extra_args+=(--verbose)
+fi
+
 bash "${et_root_dir}/examples/riscv/setup.sh"
-bash "${et_root_dir}/examples/riscv/run.sh"
+bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}"
diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml
index 79eec6cbb4c..892df2219ae 100644
--- a/.github/workflows/_test_riscv.yml
+++ b/.github/workflows/_test_riscv.yml
@@ -12,13 +12,36 @@ on:
         required: false
         type: number
         default: 30
+      model:
+        description: 'Which model to run. Possible values are: add, mv2 (mobilenetv2)'
+        required: false
+        type: string
+        default: 'add'
+      xnnpack:
+        description: 'Whether to enable XNNPACK'
+        required: false
+        type: boolean
+        default: false
+      quantize:
+        description: 'Produce an 8-bit quantized model'
+        required: false
+        type: boolean
+        default: false
+      gcc-version:
+        description: 'The version of GCC to use'
+        required: false
+        type: number
+      docker-image:
+        description: 'The docker image to use for this job'
+        required: false
+        type: string
 
 jobs:
   run:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc11
+      docker-image: ${{ inputs.docker-image || 'ci-image:executorch-ubuntu-22.04-gcc11' }}
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: ${{ inputs.timeout }}
@@ -29,4 +52,5 @@ jobs:
         source .ci/scripts/utils.sh
         install_executorch "--use-pt-pinned-commit"
 
-        bash .ci/scripts/test_riscv_qemu.sh
+        export GCC_VERSION=${{ inputs.gcc-version }}
+        bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' }} ${{ inputs.quantize && '--quantize' }}
diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml
index aa970dc6270..b32924046a4 100644
--- a/.github/workflows/riscv64.yml
+++ b/.github/workflows/riscv64.yml
@@ -25,6 +25,30 @@ jobs:
   test-riscv:
     name: test-riscv
     uses: ./.github/workflows/_test_riscv.yml
+    strategy:
+      matrix:
+        include:
+          - { model: add,        xnnpack: false, quantize: false }
+          - { model: add,        xnnpack: true,  quantize: false }
+          - { model: mv2,        xnnpack: false, quantize: false }
+          - { model: mv2,        xnnpack: true,  quantize: false }
+          - { model: mv2,        xnnpack: true,  quantize: true }
+          - { model: mobilebert, xnnpack: false, quantize: false }
+          - { model: mobilebert, xnnpack: true,  quantize: false }
+          - { model: mobilebert, xnnpack: true,  quantize: true }
+          - { model: llama2,     xnnpack: false, quantize: false }
+          - { model: llama2,     xnnpack: true,  quantize: false }
+          - { model: llama2,     xnnpack: true,  quantize: true }
+          - { model: resnet18,   xnnpack: false, quantize: false }
+          - { model: resnet18,   xnnpack: true,  quantize: false }
+          - { model: resnet18,   xnnpack: true,  quantize: true }
     permissions:
       id-token: write
       contents: read
+    with:
+      model: ${{ matrix.model }}
+      xnnpack: ${{ matrix.xnnpack }}
+      quantize: ${{ matrix.quantize }}
+      # XNNPACK requires GCC 14+
+      gcc-version: ${{ matrix.xnnpack && 14 }}
+      docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' }}
diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py
index 8076f056ba2..22e8b31df73 100644
--- a/examples/riscv/aot_riscv.py
+++ b/examples/riscv/aot_riscv.py
@@ -3,14 +3,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-"""AOT export for the RISC-V Phase 1.0 smoke test.
+"""AOT export for the RISC-V smoke test.
 
-Exports a trivial ``torch.add`` module to a BundledProgram (.bpte) that the
-portable executor_runner can load on a riscv64 target and verify against the
-embedded reference output, emitting ``Test_result: PASS`` on success.
+Exports a small model to a BundledProgram (.bpte) that the portable
+executor_runner can load on a riscv64 target and verify against the embedded
+reference output, emitting ``Test_result: PASS`` on success.
 """
 
 import argparse
+import logging
 from pathlib import Path
 
 import torch
@@ -28,26 +29,186 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         return x + y
 
 
+def build_add():
+    model = AddModule().eval()
+    example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0))
+    test_inputs = [
+        (torch.ones(1, 4), torch.full((1, 4), 2.0)),
+        (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)),
+    ]
+    return model, example_inputs, test_inputs, True
+
+
+def build_mv2():
+    from torchvision.models import mobilenet_v2, MobileNet_V2_Weights
+
+    model = mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).eval()
+    torch.manual_seed(0)
+    example_inputs = (torch.randn(1, 3, 224, 224),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+def build_mobilebert():
+    from transformers import MobileBertConfig, MobileBertModel
+
+    config = MobileBertConfig(
+        vocab_size=1024,
+        hidden_size=128,
+        embedding_size=64,
+        num_hidden_layers=2,
+        num_attention_heads=2,
+        intermediate_size=128,
+        intra_bottleneck_size=32,
+    )
+
+    class Wrapper(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.model = MobileBertModel(config).eval()
+
+        def forward(self, input_ids):
+            return self.model(input_ids).last_hidden_state
+
+    model = Wrapper().eval()
+    example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]]),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+def build_llama2():
+    # Use the executorch native Transformer (matches MODEL_NAME_TO_MODEL["llama2"]
+    # in examples/models/__init__.py). Unlike HF LlamaModel, RoPE freqs are
+    # precomputed buffers and just sliced at forward time, so no
+    # torch.arange()/Long causal mask is built per forward — which is what
+    # the PT2E XNNPACK quantizer trips over on HF Llama.
+    from executorch.examples.models.llama.llama_transformer import construct_transformer
+    from executorch.examples.models.llama.model_args import ModelArgs
+
+    seq_len = 8
+    args = ModelArgs(
+        dim=128,
+        n_layers=2,
+        n_heads=4,
+        n_kv_heads=2,  # GQA: kv_heads < n_heads exercises the GQA path
+        vocab_size=1024,
+        hidden_dim=256,  # SwiGLU FFN: gate + up projections at this width
+        max_seq_len=seq_len,
+        max_context_len=seq_len,
+        rope_theta=10000.0,
+    )
+    torch.manual_seed(0)
+    model = construct_transformer(args).eval()
+    example_inputs = (torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]], dtype=torch.long),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+def build_resnet18():
+    from torchvision.models import resnet18, ResNet18_Weights
+
+    model = resnet18(weights=ResNet18_Weights.DEFAULT).eval()
+    torch.manual_seed(0)
+    example_inputs = (torch.randn(1, 3, 224, 224),)
+    test_inputs = [example_inputs]
+    return model, example_inputs, test_inputs, False
+
+
+MODELS = {
+    "add": build_add,
+    "mv2": build_mv2,
+    "mobilebert": build_mobilebert,
+    "llama2": build_llama2,
+    "resnet18": build_resnet18,
+}
+
+
 def main() -> None:
     parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--model",
+        choices=sorted(MODELS),
+        default="add",
+        help="Which model to export",
+    )
     parser.add_argument(
         "--output",
         type=Path,
-        default=Path("add_riscv.bpte"),
-        help="Output .bpte path",
+        default=None,
+        help="Output .bpte path (default: <model>_riscv.bpte)",
+    )
+    parser.add_argument(
+        "--xnnpack",
+        action="store_true",
+        help="Lower through the XNNPACK partitioner",
+    )
+    parser.add_argument(
+        "--quantize",
+        action="store_true",
+        help="Produce an 8-bit quantized model",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable XNNPACK partitioner DEBUG logging and dump the lowered graph",
     )
     args = parser.parse_args()
 
-    model = AddModule().eval()
-    example_inputs = (torch.ones(1, 4), torch.full((1, 4), 2.0))
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
 
-    exported = export(model, example_inputs)
-    et_program = to_edge_transform_and_lower(exported).to_executorch()
+    if args.output is None:
+        args.output = Path(f"{args.model}_riscv.bpte")
+
+    model, example_inputs, test_inputs, strict = MODELS[args.model]()
+
+    if args.quantize:
+        from executorch.examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType
+        from executorch.examples.xnnpack.quantization.utils import quantize
+
+        if args.model not in MODEL_NAME_TO_OPTIONS:
+            parser.error(f"No XNNPACK quantization recipe for model {args.model!r}")
+        quant_type = MODEL_NAME_TO_OPTIONS[args.model].quantization
+        if quant_type == QuantType.NONE:
+            parser.error(f"Quantization recipe for {args.model!r} is NONE")
+        ep = export(model, example_inputs, strict=strict)
+        model = quantize(ep.module(), example_inputs, quant_type)
+
+    exported = export(model, example_inputs, strict=strict)
+    partitioners = []
+    if args.xnnpack:
+        from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
+            XnnpackPartitioner,
+        )
+
+        partitioners.append(XnnpackPartitioner(verbose=args.verbose))
+
+    compile_config = None
+    if args.quantize:
+        from executorch.exir import EdgeCompileConfig
+
+        compile_config = EdgeCompileConfig(_check_ir_validity=False)
+
+    edge = to_edge_transform_and_lower(
+        exported, partitioner=partitioners, compile_config=compile_config
+    )
+    delegated = sum(
+        1
+        for n in edge.exported_program().graph.nodes
+        if n.op == "call_function" and "call_delegate" in str(n.target)
+    )
+    print(
+        f"[aot_riscv] model={args.model} xnnpack={args.xnnpack} "
+        f"quantize={args.quantize} delegated_nodes={delegated}"
+    )
+
+    if args.verbose:
+        from executorch.exir.backend.utils import print_delegated_graph
+
+        print_delegated_graph(edge.exported_program().graph_module)
+
+    et_program = edge.to_executorch()
 
-    test_inputs = [
-        (torch.ones(1, 4), torch.full((1, 4), 2.0)),
-        (torch.full((1, 4), 3.0), torch.full((1, 4), 4.0)),
-    ]
     test_suite = MethodTestSuite(
         method_name="forward",
         test_cases=[
diff --git a/examples/riscv/requirements.txt b/examples/riscv/requirements.txt
new file mode 100644
index 00000000000..273e7156a1d
--- /dev/null
+++ b/examples/riscv/requirements.txt
@@ -0,0 +1,2 @@
+torchvision
+transformers
diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh
index 7c05edcbc8c..644944ab8a4 100755
--- a/examples/riscv/run.sh
+++ b/examples/riscv/run.sh
@@ -20,11 +20,19 @@ build_dir="${et_root_dir}/cmake-out-riscv"
 output_dir="${et_root_dir}/riscv_test"
 qemu="qemu-riscv64-static"
 qemu_timeout="600"
+model="add"
+xnnpack=false
+quantize=false
+verbose=false
 
 usage() {
     cat <<EOF
 Usage: $(basename "$0") [options]
 Options:
+  --model=<NAME>          Which model to export and run (default: ${model})
+  --xnnpack               Enable the XNNPACK backend (AOT partitioner + runtime)
+  --quantize              Produce an 8-bit quantized model
+  --verbose               Enable XNNPACK partitioner DEBUG logging and dump the lowered graph
   --build_only            Only export and cross-compile; do not invoke QEMU
   --build_dir=<DIR>       CMake build directory (default: ${build_dir})
   --output_dir=<DIR>      Directory for the exported .bpte (default: ${output_dir})
@@ -36,6 +44,10 @@ EOF
 
 for arg in "$@"; do
     case $arg in
+        --model=*) model="${arg#*=}" ;;
+        --xnnpack) xnnpack=true ;;
+        --quantize) quantize=true ;;
+        --verbose) verbose=true ;;
         --build_only) build_only=true ;;
         --build_dir=*) build_dir="${arg#*=}" ;;
         --output_dir=*) output_dir="${arg#*=}" ;;
@@ -47,14 +59,29 @@ for arg in "$@"; do
 done
 
 mkdir -p "${output_dir}"
-bpte_path="${output_dir}/add_riscv.bpte"
+bpte_path="${output_dir}/${model}_riscv.bpte"
 
 echo "[run.sh] Step 1/3: AOT export on host"
-python "${script_dir}/aot_riscv.py" --output "${bpte_path}"
+aot_extra_args=()
+if ${xnnpack}; then
+    aot_extra_args+=(--xnnpack)
+fi
+if ${quantize}; then
+    aot_extra_args+=(--quantize)
+fi
+if ${verbose}; then
+    aot_extra_args+=(--verbose)
+fi
+python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}"
 
 echo "[run.sh] Step 2/3: cross-compile executor_runner for riscv64-linux"
+cmake_extra_args=()
+if ${xnnpack}; then
+    cmake_extra_args+=(-DEXECUTORCH_BUILD_XNNPACK=ON)
+fi
 cmake -S "${et_root_dir}" -B "${build_dir}" \
     --preset riscv64-linux \
+    "${cmake_extra_args[@]}" \
     -DCMAKE_BUILD_TYPE=Release
 cmake --build "${build_dir}" -j"$(nproc)" --target executor_runner
 
@@ -87,9 +114,15 @@ export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}"
 log_file=$(mktemp)
 trap 'rm -f "${log_file}"' EXIT
 
+runner_extra_args=()
+if ${quantize}; then
+    runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25)
+fi
+
 set +e
 timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \
     --model_path="${bpte_path}" \
+    "${runner_extra_args[@]}" \
     2>&1 | tee "${log_file}"
 qemu_status=${PIPESTATUS[0]}
 set -e
diff --git a/examples/riscv/setup.sh b/examples/riscv/setup.sh
index c1342c60d5e..955c8ca3386 100755
--- a/examples/riscv/setup.sh
+++ b/examples/riscv/setup.sh
@@ -10,6 +10,8 @@
 
 set -eu
 
+script_dir=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
+
 if ! command -v apt-get >/dev/null 2>&1; then
     echo "[$(basename "$0")] this setup script targets Debian/Ubuntu (apt-get not found)" >&2
     exit 1
@@ -23,14 +25,23 @@ fi
 ${SUDO} apt-get update
 ${SUDO} apt-get install -y --no-install-recommends \
     build-essential \
-    gcc-riscv64-linux-gnu \
-    g++-riscv64-linux-gnu \
+    gcc${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
+    g++${GCC_VERSION:+-${GCC_VERSION}}-riscv64-linux-gnu \
     binutils-riscv64-linux-gnu \
     libc6-riscv64-cross \
     libc6-dev-riscv64-cross \
     cmake \
     file \
+    ca-certificates \
     qemu-user-static
 
+if [[ -n "${GCC_VERSION+x}" ]]; then
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-gcc riscv64-linux-gnu-gcc /usr/bin/riscv64-linux-gnu-gcc${GCC_VERSION:+-${GCC_VERSION}} 100
+    ${SUDO} update-alternatives --install /usr/bin/riscv64-linux-gnu-g++ riscv64-linux-gnu-g++ /usr/bin/riscv64-linux-gnu-g++${GCC_VERSION:+-${GCC_VERSION}} 100
+fi
+
 riscv64-linux-gnu-gcc --version | head -n1
 qemu-riscv64-static --version | head -n1
+
+# Some python packages also need to be installed
+pip install -r "${script_dir}/requirements.txt"
diff --git a/tools/cmake/preset/riscv64_linux.cmake b/tools/cmake/preset/riscv64_linux.cmake
index 32b891cd743..c094534b594 100644
--- a/tools/cmake/preset/riscv64_linux.cmake
+++ b/tools/cmake/preset/riscv64_linux.cmake
@@ -9,3 +9,17 @@ set_overridable_option(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL ON)
 set_overridable_option(EXECUTORCH_BUILD_DEVTOOLS ON)
 set_overridable_option(EXECUTORCH_ENABLE_BUNDLE_IO ON)
 set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON)
+
+if(EXECUTORCH_BUILD_XNNPACK)
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS 14)
+    message(FATAL_ERROR "XNNPACK requires GCC 14+ on riscv64")
+  endif()
+elseif(NOT DEFINED EXECUTORCH_BUILD_XNNPACK)
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL
+                                 14
+  )
+    set(EXECUTORCH_BUILD_XNNPACK ON)
+  else()
+    message(NOTICE "XNNPACK requires GCC 14+ on riscv64")
+  endif()
+endif()