Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ PTO Runtime compiles three independent programs (Host `.so`, AICPU `.so`, AICore
### Run a single example
```bash
python examples/scripts/run_example.py \
-k examples/host_build_graph/vector_example/kernels \
-g examples/host_build_graph/vector_example/golden.py \
-k examples/a2a3/host_build_graph/vector_example/kernels \
-g examples/a2a3/host_build_graph/vector_example/golden.py \
-p a2a3sim
```

Expand Down
63 changes: 43 additions & 20 deletions ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,10 @@ DEVICE_TESTS_DIR="tests/device_tests"

declare -a HW_TASK_NAMES=()
declare -a HW_TASK_DIRS=()
declare -a HW_TASK_PLATS=()
declare -a SIM_TASK_NAMES=()
declare -a SIM_TASK_DIRS=()
declare -a SIM_TASK_PLATS=()

# Discover examples
while IFS= read -r -d '' example_dir; do
Expand All @@ -211,15 +213,21 @@ while IFS= read -r -d '' example_dir; do
[[ -f "$kernel_config" && -f "$golden" ]] || continue

example_name="${example_dir#$EXAMPLES_DIR/}"
example_runtime="${example_name%%/*}" # Extract runtime from path
example_arch="${example_name%%/*}" # Extract arch (a2a3/a5) from path
example_rest="${example_name#*/}"
example_runtime="${example_rest%%/*}" # Extract runtime from path

# Filter by runtime if specified
if [[ -n "$RUNTIME" && "$example_name" != "$RUNTIME"/* ]]; then
if [[ -n "$RUNTIME" && "$example_runtime" != "$RUNTIME" ]]; then
continue
fi

# Filter by platform's supported runtimes
# Filter by platform's arch and supported runtimes
if [[ -n "$PLATFORM" ]]; then
platform_base="${PLATFORM%sim}"
if [[ "$example_arch" != "$platform_base" ]]; then
continue # Skip examples not matching platform arch
fi
platform_runtimes="$(get_platform_runtimes "$PLATFORM")"
if [[ ! " $platform_runtimes " =~ " $example_runtime " ]]; then
continue # Skip unsupported runtime for this platform
Expand All @@ -230,18 +238,23 @@ while IFS= read -r -d '' example_dir; do
if [[ "$PLATFORM" =~ sim$ ]]; then
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
SIM_TASK_PLATS+=("${PLATFORM}")
else
HW_TASK_NAMES+=("example:${example_name}")
HW_TASK_DIRS+=("${example_dir}")
HW_TASK_PLATS+=("${PLATFORM}")
fi
elif [[ "$OS" == "Darwin" ]]; then
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
SIM_TASK_PLATS+=("${example_arch}sim")
else
HW_TASK_NAMES+=("example:${example_name}")
HW_TASK_DIRS+=("${example_dir}")
HW_TASK_PLATS+=("${example_arch}")
SIM_TASK_NAMES+=("example:${example_name}")
SIM_TASK_DIRS+=("${example_dir}")
SIM_TASK_PLATS+=("${example_arch}sim")
fi
done < <(find "$EXAMPLES_DIR" -mindepth 1 -type d -print0 | sort -z)

Expand All @@ -257,15 +270,21 @@ if [[ -d "$DEVICE_TESTS_DIR" ]]; then
golden="${test_dir}/golden.py"
[[ -f "$kernel_config" && -f "$golden" ]] || continue
test_name="${test_dir#$DEVICE_TESTS_DIR/}"
test_runtime="${test_name%%/*}" # Extract runtime from path
test_arch="${test_name%%/*}" # Extract arch (a2a3/a5) from path
test_rest="${test_name#*/}"
test_runtime="${test_rest%%/*}" # Extract runtime from path

# Filter by runtime if specified
if [[ -n "$RUNTIME" && "$test_name" != "$RUNTIME"/* ]]; then
if [[ -n "$RUNTIME" && "$test_runtime" != "$RUNTIME" ]]; then
continue
fi

# Filter by platform's supported runtimes
# Filter by platform's arch and supported runtimes
if [[ -n "$PLATFORM" ]]; then
platform_base="${PLATFORM%sim}"
if [[ "$test_arch" != "$platform_base" ]]; then
continue # Skip tests not matching platform arch
fi
platform_runtimes="$(get_platform_runtimes "$PLATFORM")"
if [[ ! " $platform_runtimes " =~ " $test_runtime " ]]; then
continue # Skip unsupported runtime for this platform
Expand All @@ -274,6 +293,7 @@ if [[ -d "$DEVICE_TESTS_DIR" ]]; then

HW_TASK_NAMES+=("device_test:${test_name}")
HW_TASK_DIRS+=("${test_dir}")
HW_TASK_PLATS+=("${PLATFORM:-${test_arch}}")
done < <(find "$DEVICE_TESTS_DIR" -mindepth 1 -type d -print0 | sort -z)
else
echo "Skipping device tests (hardware platforms only)"
Expand All @@ -282,18 +302,14 @@ fi

echo "Discovered ${#HW_TASK_NAMES[@]} hardware tasks, ${#SIM_TASK_NAMES[@]} simulation tasks"

# Determine platforms for execution
HW_PLATFORM="${PLATFORM:-a2a3}"
SIM_PLATFORM="${PLATFORM:-a2a3sim}"

MAX_RETRIES=3

# ---- Unified task runner ----
# Runs a single task and records the result.
# Log naming: ${safe_name}_${platform}_attempt${attempt}.log
# Result format: name|platform|PASS/FAIL|device:X|attempt:N|Xs
run_task() {
local name="$1" dir="$2" platform="$3" attempt="$4" device_id="$5"
local name="$1" dir="$2" platform="$3" attempt="$4" device_id="$5" print_log_on_fail="${6:-true}"
local safe_name="${name//[:\/]/_}"
local task_log="${LOG_DIR}/${safe_name}_${platform}_attempt${attempt}.log"
local start_time=$SECONDS
Expand All @@ -319,9 +335,11 @@ run_task() {
else
status="FAIL"
echo "[${platform}${device_id:+:dev${device_id}}] FAIL: $name (${elapsed}s)"
echo "--- LOG: $name (attempt $attempt) ---"
cat "$task_log"
echo "--- END ---"
if [[ "$print_log_on_fail" == "true" ]]; then
echo "--- LOG: $name (attempt $attempt) ---"
cat "$task_log"
echo "--- END ---"
fi
fi
echo "${name}|${platform}|${status}|device:${device_id:-sim}|attempt:${attempt}|${elapsed}s" \
>> "$RESULTS_FILE"
Expand All @@ -348,7 +366,7 @@ run_sim_tasks() {
local -a pids=()
for idx in "${indices[@]}"; do
(
if run_task "${SIM_TASK_NAMES[$idx]}" "${SIM_TASK_DIRS[$idx]}" "$SIM_PLATFORM" "$attempt"; then
if run_task "${SIM_TASK_NAMES[$idx]}" "${SIM_TASK_DIRS[$idx]}" "${SIM_TASK_PLATS[$idx]}" "$attempt"; then
echo "${idx}|PASS" >> "$sim_marker"
else
echo "${idx}|FAIL" >> "$sim_marker"
Expand All @@ -359,7 +377,7 @@ run_sim_tasks() {
for pid in "${pids[@]}"; do wait "$pid" 2>/dev/null || true; done
else
for idx in "${indices[@]}"; do
if run_task "${SIM_TASK_NAMES[$idx]}" "${SIM_TASK_DIRS[$idx]}" "$SIM_PLATFORM" "$attempt"; then
if run_task "${SIM_TASK_NAMES[$idx]}" "${SIM_TASK_DIRS[$idx]}" "${SIM_TASK_PLATS[$idx]}" "$attempt"; then
echo "${idx}|PASS" >> "$sim_marker"
else
echo "${idx}|FAIL" >> "$sim_marker"
Expand Down Expand Up @@ -406,17 +424,22 @@ run_hw_tasks() {

IFS=':' read -r idx attempt <<< "$entry"

if run_task "${HW_TASK_NAMES[$idx]}" "${HW_TASK_DIRS[$idx]}" "$HW_PLATFORM" "$attempt" "$device_id"; then
if run_task "${HW_TASK_NAMES[$idx]}" "${HW_TASK_DIRS[$idx]}" "${HW_TASK_PLATS[$idx]}" "$attempt" "$device_id" "false"; then
echo "${idx}|PASS" >> "$hw_marker"
else
next=$((attempt + 1))
if [[ $next -lt $MAX_RETRIES ]]; then
flock "$lock" bash -c "echo '${idx}:${next}' >> \"$queue\""
else
echo "${idx}|FAIL" >> "$hw_marker"
local safe_name="${HW_TASK_NAMES[$idx]//[:\/]/_}"
local last_log="${LOG_DIR}/${safe_name}_${HW_TASK_PLATS[$idx]}_attempt${attempt}.log"
echo "--- LOG: ${HW_TASK_NAMES[$idx]} (attempt $attempt) ---"
cat "$last_log"
echo "--- END ---"
echo "[${HW_TASK_PLATS[$idx]}:dev${device_id}] Device quarantined after exhausting retries"
break
fi
echo "[${HW_PLATFORM}:dev${device_id}] Device quarantined after failure"
break
fi
done
) &
Expand Down Expand Up @@ -606,7 +629,7 @@ for i in "${!TASK_ORDER[@]}"; do

platform="${FINAL_PLATFORM[$i]}"
device="${FINAL_DEVICE[$i]}"
attempt="${FINAL_ATTEMPT[$i]}"
attempt=$(( FINAL_ATTEMPT[$i] + 1 ))
timing="${FINAL_TIMING[$i]}"

if [[ "$result" == "FAIL" ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_output_param(li_batch),
make_output_param(mi_batch),
};
pto2_rt_submit_task(rt, FUNC_AIV_HUB, PTO2_WORKER_VECTOR, params_hub, 3);
pto2_rt_submit_aiv_task(rt, FUNC_AIV_HUB, params_hub, 3);

for (uint64_t bn = 0; bn < max_bn; bn++) {
uint64_t sij_shapes[2] = {chunk_bc * q_tile, block_size};
Expand All @@ -160,7 +160,7 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_scalar_param(num_heads),
make_scalar_param(batch_start),
};
pto2_rt_submit_task(rt, FUNC_QK_MATMUL, PTO2_WORKER_CUBE, params_qk, 10);
pto2_rt_submit_aic_task(rt, FUNC_QK_MATMUL, params_qk, 10);

PTOParam params_sf[] = {
make_input_param(sij_b),
Expand All @@ -173,7 +173,7 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_scalar_param(bn),
make_scalar_param(batch_start),
};
pto2_rt_submit_task(rt, FUNC_SOFTMAX_PREPARE, PTO2_WORKER_VECTOR, params_sf, 9);
pto2_rt_submit_aiv_task(rt, FUNC_SOFTMAX_PREPARE, params_sf, 9);

PTOParam params_pv[] = {
make_input_param(pij_b),
Expand All @@ -185,7 +185,7 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_scalar_param(block_num),
make_scalar_param(batch_start),
};
pto2_rt_submit_task(rt, FUNC_PV_MATMUL, PTO2_WORKER_CUBE, params_pv, 8);
pto2_rt_submit_aic_task(rt, FUNC_PV_MATMUL, params_pv, 8);

uint64_t is_first = (bn == 0) ? 1 : 0;
uint64_t is_last = (bn == max_bn - 1) ? 1 : 0;
Expand All @@ -204,7 +204,7 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_scalar_param(num_heads),
make_scalar_param(batch_start),
};
pto2_rt_submit_task(rt, FUNC_ONLINE_UPDATE, PTO2_WORKER_VECTOR, params_up, 13);
pto2_rt_submit_aiv_task(rt, FUNC_ONLINE_UPDATE, params_up, 13);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,15 @@ void aicpu_orchestration_entry(PTO2Runtime* rt, uint64_t* args, int arg_count, i
make_input_param(B_view),
make_output_param(P),
};
pto2_rt_submit_task(rt, FUNC_GEMM_TILE, PTO2_WORKER_CUBE,
pto2_rt_submit_aic_task(rt, FUNC_GEMM_TILE,
params_gemm, 3); // gemm

// C[m,n] += P
PTOParam params_add[] = {
make_inout_param(C_view),
make_input_param(P),
};
pto2_rt_submit_task(rt, FUNC_TILE_ADD, PTO2_WORKER_VECTOR,
pto2_rt_submit_aiv_task(rt, FUNC_TILE_ADD,
params_add, 2); // add
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,28 @@ Validate `arg_count` in `aicpu_orchestration_config` and interpret pointers as d
2. Wrap orchestration in scopes with `PTO2_SCOPE(rt)` to control tensor lifetimes.
3. Use `make_tensor_external` for input/output buffers and `make_tensor` for intermediates.
4. Build `PTOParam` arrays with `make_input_param`, `make_output_param`, `make_inout_param`, and `make_scalar_param`.
5. Submit tasks with `pto2_rt_submit_task(rt, func_id, worker_type, params, num_params)`.
5. Submit tasks with one of:
- `pto2_rt_submit_aic_task(rt, kernel_id, params, num_params)` — AIC (CUBE) task
- `pto2_rt_submit_aiv_task(rt, kernel_id, params, num_params)` — AIV (VECTOR) task
- `pto2_rt_submit_task(rt, mixed_kernels, params, num_params)` — mixed task with a `MixedKernels` struct

Dependencies are inferred by TensorMap from input/inout/output tensors, so you do not add explicit edges.

## Worker Types And Kernel IDs
- Worker types come from `pto_orchestration_api.h` (`PTO2_WORKER_CUBE`, `PTO2_WORKER_VECTOR`, etc.).
## Submit API And Kernel IDs
- Submit helpers are defined in `pto_orchestration_api.h`.
- `pto2_rt_submit_aic_task` and `pto2_rt_submit_aiv_task` are convenience wrappers around `pto2_rt_submit_task` with a `MixedKernels` struct.
- For mixed AIC+AIV tasks, construct a `MixedKernels` struct directly:
```cpp
MixedKernels mk;
mk.aic_kernel_id = FUNC_QK;
mk.aiv0_kernel_id = FUNC_SF;
pto2_rt_submit_task(rt, mk, params, num_params);
```
- Kernel `func_id` values are defined in `kernels/kernel_config.py` under `KERNELS`.

## Completion Semantics
Do not call `pto2_rt_orchestration_done` yourself in device mode. The executor wraps the entry call in an outer scope and signals completion after `aicpu_orchestration_entry` returns.

## Examples
- `examples/tensormap_and_ringbuffer/vector_example/kernels/orchestration/example_orchestration.cpp`
- `examples/tensormap_and_ringbuffer/bgemm/kernels/orchestration/bgemm_orch.cpp`
- `examples/tensormap_and_ringbuffer/vector_example/kernels/orchestration/example_orchestration.cpp` (AIV-only tasks)
- `examples/tensormap_and_ringbuffer/bgemm/kernels/orchestration/bgemm_orch.cpp` (mixed AIC + AIV tasks)
Loading