Arm backend: Add adaptive pooling node visitors (#20220) #1302
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: cuda-perf | |
| on: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| tags: | |
| - ciflow/cuda-perf/* | |
| pull_request: | |
| paths: | |
| - .github/workflows/cuda-perf.yml | |
| - .ci/scripts/cuda_benchmark.py | |
| - .ci/scripts/cuda_perf_prompts/** | |
| - .ci/scripts/export_model_artifact.sh | |
| - .ci/scripts/test_model_e2e.sh | |
| workflow_dispatch: | |
| inputs: | |
| models: | |
| description: Models to be benchmarked (comma-separated HuggingFace model IDs) | |
| required: false | |
| type: string | |
| quantizations: | |
| description: Quantization types (comma-separated) | |
| required: false | |
| type: string | |
| num_runs: | |
| description: Number of benchmark runs per model | |
| required: false | |
| type: string | |
| default: "50" | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| jobs: | |
| changed-files: | |
| name: Get changed files | |
| uses: ./.github/workflows/_get-changed-files.yml | |
| with: | |
| include-push-diff: true | |
| run-decision: | |
| name: CI run decision | |
| uses: ./.github/workflows/_ci-run-decision.yml | |
| set-parameters: | |
| needs: [changed-files, run-decision] | |
| # Path-filtered: mirrors the workflow-level pull_request `paths:` | |
| # filter so push commits that don't touch perf-relevant paths skip | |
| # this whole workflow on non-sampled commits. Sampling preserves | |
| # perf time-series at every 4th commit (vs every commit pre-PR). | |
| if: | | |
| contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || | |
| needs.run-decision.outputs.is-full-run == 'true' | |
| runs-on: ubuntu-22.04 | |
| outputs: | |
| benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }} | |
| steps: | |
| - uses: actions/checkout@v3 | |
| with: | |
| submodules: 'false' | |
| - uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.10' | |
| - name: Set parameters | |
| id: set-parameters | |
| shell: bash | |
| env: | |
| ALL_MODELS: 'mistralai/Voxtral-Mini-3B-2507,openai/whisper-small,openai/whisper-medium,openai/whisper-large-v3-turbo,google/gemma-3-4b-it,nvidia/parakeet-tdt,SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4' | |
| ALL_QUANTIZATIONS: 'non-quantized,quantized-int4-tile-packed,quantized-int4-weight-only' | |
| NUM_RUNS: ${{ inputs.num_runs || '50' }} | |
| run: | | |
| set -eux | |
| MODELS="${{ inputs.models }}" | |
| QUANTIZATIONS="${{ inputs.quantizations }}" | |
| # Use all models/quantizations unless overridden by workflow_dispatch | |
| if [ -z "$MODELS" ]; then | |
| MODELS="$ALL_MODELS" | |
| fi | |
| if [ -z "$QUANTIZATIONS" ]; then | |
| QUANTIZATIONS="$ALL_QUANTIZATIONS" | |
| fi | |
| # Split models and quantizations into arrays | |
| IFS=',' read -ra MODEL_ARRAY <<< "$MODELS" | |
| IFS=',' read -ra QUANT_ARRAY <<< "$QUANTIZATIONS" | |
| # Generate benchmark configs (skip invalid model/quant combinations) | |
| CONFIGS='{"include":[' | |
| FIRST=true | |
| for MODEL in "${MODEL_ARRAY[@]}"; do | |
| for QUANT in "${QUANT_ARRAY[@]}"; do | |
| # Qwen3.5 MoE only supports quantized-int4-tile-packed | |
| if [[ "$MODEL" == *"Qwen3.5-35B-A3B"* ]] && [ "$QUANT" != "quantized-int4-tile-packed" ]; then | |
| continue | |
| fi | |
| if [ "$FIRST" = true ]; then | |
| FIRST=false | |
| else | |
| CONFIGS+=',' | |
| fi | |
| # Sanitize model name for use in artifact paths | |
| MODEL_SAFE=$(echo "$MODEL" | sed 's/\//_/g') | |
| CONFIGS+="{\"model\":\"$MODEL\",\"quant\":\"$QUANT\",\"model_safe\":\"$MODEL_SAFE\",\"num_runs\":\"$NUM_RUNS\"}" | |
| done | |
| done | |
| CONFIGS+=']}' | |
| echo "benchmark_configs=$CONFIGS" >> $GITHUB_OUTPUT | |
| echo "Generated benchmark configs:" | |
| echo "$CONFIGS" | python -m json.tool | |
| export-models: | |
| name: export-models | |
| needs: set-parameters | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| secrets: inherit | |
| strategy: | |
| matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} | |
| fail-fast: false | |
| with: | |
| timeout: 90 | |
| secrets-env: EXECUTORCH_HF_TOKEN | |
| runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }} | |
| gpu-arch-type: cuda | |
| gpu-arch-version: "13.0" | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| upload-artifact: model-${{ matrix.model_safe }}-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| echo "::group::Setup ExecuTorch" | |
| # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's | |
| # transitive deps resolve to. Pre-install torch's pure-python deps from the | |
| # in-cluster pypi-cache and drop the default cpu extra-index so the cuda | |
| # torch wheel is the only candidate. | |
| export PIP_EXTRA_INDEX_URL= | |
| # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later | |
| # examples install doesn't try to downgrade it from the public CDN. | |
| pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow | |
| # Disable MKL to avoid duplicate target error when conda has multiple MKL installations | |
| export USE_MKL=OFF | |
| ./install_executorch.sh | |
| echo "::endgroup::" | |
| echo "::group::Setup Huggingface" | |
| pip install -U "huggingface_hub[cli]>=1.2.1,<2.0" accelerate "optimum~=2.0.0" "transformers==5.0.0rc1" | |
| HF_AUTH_TOKEN="$(printf '%s' "$SECRET_EXECUTORCH_HF_TOKEN" | tr -d '\r\n')" | |
| hf auth login --token "$HF_AUTH_TOKEN" | |
| OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) | |
| pip install --no-deps git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} | |
| echo "::endgroup::" | |
| echo "::group::Exporting model ${{ matrix.model }} with quantization ${{ matrix.quant }}" | |
| OUTPUT_DIR="model_artifacts" | |
| mkdir -p "$OUTPUT_DIR" | |
| bash .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model }}" "${{ matrix.quant }}" "$OUTPUT_DIR" | |
| # Move artifacts to RUNNER_ARTIFACT_DIR for upload | |
| mv "$OUTPUT_DIR"/* "${RUNNER_ARTIFACT_DIR}/" | |
| ls -lah "${RUNNER_ARTIFACT_DIR}" | |
| echo "::endgroup::" | |
| benchmark-cuda: | |
| name: benchmark-cuda | |
| needs: | |
| - changed-files | |
| - run-decision | |
| - set-parameters | |
| - export-models | |
| # Inherit the gate from set-parameters/export-models (they cascade- | |
| # skip when the gate evaluates false). `always()` keeps benchmark- | |
| # cuda running even when some export-models matrix cells fail — | |
| # but only if the gate itself is open. Without the explicit gate | |
| # here, `always()` would fire benchmark-cuda even when set- | |
| # parameters was gated out. | |
| if: | | |
| always() && | |
| ( | |
| contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || | |
| needs.run-decision.outputs.is-full-run == 'true' | |
| ) | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v3.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| strategy: | |
| matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_configs) }} | |
| fail-fast: false | |
| with: | |
| timeout: 90 | |
| runner: ${{ contains(matrix.model, 'Qwen3.5-35B-A3B') && 'mt-l-x86iavx512-11-125-a100' || 'mt-l-x86aavx2-29-113-a10g' }} | |
| gpu-arch-type: cuda | |
| gpu-arch-version: "13.0" | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| download-artifact: model-${{ matrix.model_safe }}-${{ matrix.quant }} | |
| upload-artifact: results-${{ matrix.model_safe }}-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| echo "::group::Setup environment" | |
| # OSDC runners can't reach the public PyPI CDN that download.pytorch.org's | |
| # transitive deps resolve to. Pre-install torch's pure-python deps from the | |
| # in-cluster pypi-cache and drop the default cpu extra-index so the cuda | |
| # torch wheel is the only candidate. | |
| export PIP_EXTRA_INDEX_URL= | |
| # fsspec is pinned to satisfy datasets' fsspec[http]<=2025.3.0 so the later | |
| # examples install doesn't try to downgrade it from the public CDN. | |
| pip install filelock typing-extensions "setuptools<82" sympy networkx jinja2 "fsspec[http]<=2025.3.0" numpy pillow | |
| ./install_requirements.sh | |
| pip list | |
| echo "::endgroup::" | |
| echo "::group::Prepare model artifacts" | |
| mkdir -p model_artifacts | |
| cp "${RUNNER_ARTIFACT_DIR}/model.pte" model_artifacts/model.pte | |
| cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" model_artifacts/aoti_cuda_blob.ptd | |
| # Copy additional files if they exist | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/whisper_preprocessor.pte" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/whisper_preprocessor.pte" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/tekken.json" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/tekken.json" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/poem.wav" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/poem.wav" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/output.wav" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/output.wav" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/tokenizer.model" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/tokenizer.model" model_artifacts/ | |
| fi | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/test_audio.wav" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/test_audio.wav" model_artifacts/ | |
| fi | |
| # Copy tokenizer files | |
| for file in tokenizer.json tokenizer_config.json special_tokens_map.json; do | |
| if [ -f "${RUNNER_ARTIFACT_DIR}/$file" ]; then | |
| cp "${RUNNER_ARTIFACT_DIR}/$file" model_artifacts/ | |
| fi | |
| done | |
| ls -lah model_artifacts/ | |
| echo "::endgroup::" | |
| echo "::group::Build runner" | |
| bash .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model }}" "${{ matrix.quant }}" model_artifacts | |
| echo "::endgroup::" | |
| echo "::group::Running benchmark for ${{ matrix.model }} (${{ matrix.quant }}) with ${{ matrix.num_runs }} runs" | |
| export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH | |
| # Get GPU name using nvidia-smi | |
| GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1) | |
| echo "Detected GPU: $GPU_NAME" | |
| # Get CUDA driver version | |
| CUDA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -1) | |
| echo "CUDA Driver Version: $CUDA_DRIVER_VERSION" | |
| # Create results directory (separate from model artifacts) | |
| RESULTS_DIR="benchmark_results" | |
| mkdir -p "$RESULTS_DIR" | |
| # Determine model name and runner command based on model | |
| case "${{ matrix.model }}" in | |
| mistralai/Voxtral-Mini-3B-2507) | |
| RUNNER="cmake-out/examples/models/voxtral/voxtral_runner" | |
| PREPROCESSOR="model_artifacts/voxtral_preprocessor.pte" | |
| TOKENIZER="model_artifacts/tekken.json" | |
| AUDIO="model_artifacts/poem.wav" | |
| RUNNER_CMD="$RUNNER --model_path model_artifacts/model.pte --data_path model_artifacts/aoti_cuda_blob.ptd --tokenizer_path $TOKENIZER --audio_path $AUDIO --processor_path $PREPROCESSOR --temperature 0" | |
| MODEL_NAME="voxtral_${{ matrix.quant }}" | |
| ;; | |
| openai/whisper-*) | |
| RUNNER="cmake-out/examples/models/whisper/whisper_runner" | |
| PREPROCESSOR="model_artifacts/whisper_preprocessor.pte" | |
| AUDIO="model_artifacts/output.wav" | |
| RUNNER_CMD="$RUNNER --model_path model_artifacts/model.pte --data_path model_artifacts/aoti_cuda_blob.ptd --tokenizer_path model_artifacts/ --audio_path $AUDIO --processor_path $PREPROCESSOR --temperature 0" | |
| MODEL_NAME=$(echo "${{ matrix.model }}" | sed 's/openai\///')_${{ matrix.quant }} | |
| ;; | |
| google/gemma-3-4b-it) | |
| RUNNER="cmake-out/examples/models/gemma3/gemma3_e2e_runner" | |
| IMAGE="docs/source/_static/img/et-logo.png" | |
| RUNNER_CMD="$RUNNER --model_path model_artifacts/model.pte --data_path model_artifacts/aoti_cuda_blob.ptd --tokenizer_path model_artifacts/ --image_path $IMAGE --temperature 0" | |
| MODEL_NAME="gemma3_${{ matrix.quant }}" | |
| ;; | |
| nvidia/parakeet-tdt) | |
| RUNNER="cmake-out/examples/models/parakeet/parakeet_runner" | |
| AUDIO="model_artifacts/test_audio.wav" | |
| TOKENIZER="model_artifacts/tokenizer.model" | |
| RUNNER_CMD="$RUNNER --model_path model_artifacts/model.pte --data_path model_artifacts/aoti_cuda_blob.ptd --audio_path $AUDIO --tokenizer_path $TOKENIZER" | |
| MODEL_NAME="parakeet_${{ matrix.quant }}" | |
| ;; | |
| SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4) | |
| RUNNER="cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner" | |
| TOKENIZER="model_artifacts/tokenizer.json" | |
| # Use a checked-in long prompt (>1000 tokens) for benchmarking. A | |
| # static, meaningful prompt avoids the degenerate / repetitive | |
| # outputs that can result from synthetic prompts built by | |
| # repeating the same sentence. | |
| PROMPT_FILE=".ci/scripts/cuda_perf_prompts/qwen3_5_moe_long_prompt.txt" | |
| RUNNER_CMD="$RUNNER --model_path model_artifacts/model.pte --data_path model_artifacts/aoti_cuda_blob.ptd --tokenizer_path $TOKENIZER --prompt_file $PROMPT_FILE --max_new_tokens 512 --temperature 0" | |
| MODEL_NAME="qwen3_5_moe_${{ matrix.quant }}" | |
| ;; | |
| *) | |
| echo "Error: Unsupported model '${{ matrix.model }}'" | |
| exit 1 | |
| ;; | |
| esac | |
| # Run benchmark using cuda_benchmark.py | |
| python .ci/scripts/cuda_benchmark.py \ | |
| --runner_command "$RUNNER_CMD" \ | |
| --model_name "$MODEL_NAME" \ | |
| --num_runs "${{ matrix.num_runs }}" \ | |
| --output_json "$RESULTS_DIR/benchmark_results.json" \ | |
| --output_v3 "$RESULTS_DIR/benchmark_results_v3.json" \ | |
| --model "${{ matrix.model }}" \ | |
| --quantization "${{ matrix.quant }}" \ | |
| --git_sha "${{ github.sha }}" \ | |
| --workflow_run_id "${{ github.run_id }}" \ | |
| --workflow_run_url "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ | |
| --gpu_name "$GPU_NAME" \ | |
| --cuda_driver_version "$CUDA_DRIVER_VERSION" | |
| # Save additional metadata | |
| cat > "$RESULTS_DIR/metadata.json" <<EOF | |
| { | |
| "model": "${{ matrix.model }}", | |
| "quantization": "${{ matrix.quant }}", | |
| "num_runs": ${{ matrix.num_runs }}, | |
| "runner": "$RUNNER", | |
| "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)", | |
| "git_sha": "${{ github.sha }}", | |
| "workflow_run_id": "${{ github.run_id }}", | |
| "workflow_run_url": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| } | |
| EOF | |
| # Only copy benchmark results to RUNNER_ARTIFACT_DIR for upload (not the entire model) | |
| # First, clean up the downloaded model artifacts from RUNNER_ARTIFACT_DIR | |
| rm -rf "${RUNNER_ARTIFACT_DIR}"/* | |
| # Then copy only the benchmark result JSON files | |
| cp "$RESULTS_DIR"/*.json "${RUNNER_ARTIFACT_DIR}/" | |
| echo "Benchmark results prepared for upload:" | |
| ls -lah "${RUNNER_ARTIFACT_DIR}" | |
| echo "::endgroup::" | |
| upload-benchmark-results: | |
| needs: | |
| - changed-files | |
| - run-decision | |
| - benchmark-cuda | |
| # Same gate as benchmark-cuda — skip the upload when the gate | |
| # closed (no benchmarks ran). | |
| if: | | |
| always() && | |
| ( | |
| contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || | |
| contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || | |
| needs.run-decision.outputs.is-full-run == 'true' | |
| ) | |
| runs-on: ubuntu-22.04 | |
| environment: upload-benchmark-results | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - uses: actions/checkout@v3 | |
| with: | |
| submodules: false | |
| - name: Setup Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.10' | |
| - name: Download all benchmark results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: results-* | |
| path: all_results/ | |
| - name: Process and display results | |
| shell: bash | |
| run: | | |
| set -eux | |
| echo "::group::Benchmark Results Summary" | |
| for RESULT_DIR in all_results/results-*/; do | |
| if [ -f "$RESULT_DIR/benchmark_results.json" ]; then | |
| echo "" | |
| echo "================================" | |
| echo "Results from: $(basename "$RESULT_DIR")" | |
| echo "================================" | |
| # Display benchmark results (mean performance) | |
| cat "$RESULT_DIR/benchmark_results.json" | python -m json.tool | |
| # Display metadata | |
| if [ -f "$RESULT_DIR/metadata.json" ]; then | |
| echo "" | |
| echo "--- Metadata ---" | |
| cat "$RESULT_DIR/metadata.json" | python -m json.tool | |
| fi | |
| echo "" | |
| fi | |
| done | |
| echo "::endgroup::" | |
| - name: Authenticate with AWS | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | |
| role-duration-seconds: 18000 | |
| aws-region: us-east-1 | |
| - name: Upload to S3 | |
| shell: bash | |
| env: | |
| S3_BUCKET: gha-artifacts | |
| S3_PREFIX: executorch-cuda-perf/${{ github.run_id }}/${{ github.run_attempt }} | |
| run: | | |
| set -eux | |
| pip install awscli | |
| echo "Uploading benchmark results to S3..." | |
| aws s3 sync all_results/ "s3://${S3_BUCKET}/${S3_PREFIX}/" \ | |
| --exclude "*" \ | |
| --include "*.json" \ | |
| --include "*.log" | |
| echo "Results uploaded to: s3://${S3_BUCKET}/${S3_PREFIX}/" | |
| - name: Prepare v3 results for dashboard upload | |
| shell: bash | |
| run: | | |
| set -eux | |
| echo "::group::Prepare v3 results" | |
| mkdir -p benchmark-results/v3 | |
| # Collect all v3 results into a single directory | |
| for RESULT_DIR in all_results/results-*/; do | |
| if [ -f "$RESULT_DIR/benchmark_results_v3.json" ]; then | |
| # Generate unique filename based on directory name | |
| FILENAME=$(basename "$RESULT_DIR") | |
| cp "$RESULT_DIR/benchmark_results_v3.json" "benchmark-results/v3/${FILENAME}.json" | |
| echo "✓ Copied $FILENAME v3 results" | |
| fi | |
| done | |
| echo "V3 results prepared:" | |
| ls -lah benchmark-results/v3/ | |
| echo "::endgroup::" | |
| - name: Upload benchmark results to dashboard | |
| uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main | |
| with: | |
| benchmark-results-dir: benchmark-results/v3 | |
| dry-run: false | |
| schema-version: v3 | |
| github-token: ${{ secrets.GITHUB_TOKEN }} |