Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
395 changes: 343 additions & 52 deletions egomimic/algo/pi.py

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions egomimic/eval/eval_pi.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ def compute_metrics_and_viz(self, batch):
total_loss = total_loss + loss_val
n_loss_embodiments += 1

# Subtask-prediction (hierarchical) val signals — CE loss + teacher-
# forced token accuracy. Computed by forward_eval; surfaced here.
sub_loss_key = f"{embodiment_name}_subtask_loss"
if sub_loss_key in preds:
metrics[f"Valid/{sub_loss_key}"] = preds[sub_loss_key]
sub_acc_key = f"{embodiment_name}_subtask_acc"
if sub_acc_key in preds:
metrics[f"Valid/{sub_acc_key}"] = preds[sub_acc_key]

if pred_key in preds:
metrics[f"Valid/{pred_key}_paired_mse_avg"] = mse(
preds[pred_key].cpu(), _batch[ac_key].cpu()
Expand Down
59 changes: 59 additions & 0 deletions egomimic/hydra_configs/data/sort_pi_subtask.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
defaults:
- cotrain_pi_lang_6d
- _self_

# Sort-regime data for hierarchical subtask prediction.
#
# Inherits `cotrain_pi_lang_6d` (NOT `cotrain_pi_lang`): the model config
# `pi0.5_subtask` uses action_encoding=cartesian_normalized_rot6d, which expects
# the xyz+6D(+gripper) action layout produced by transform mode `cartesian_6d`.
# Inheriting the plain `cotrain_pi_lang` (ypr) parent would crash in
# to32_norm_6d on the first batch.
#
# Two changes vs. the parent:
# 1. resolver key_map registers a high-level view of the annotation array
# (`annotations_high`), filtered to the `level == "high"` sort goals the
# SortConverter writes. The primary `annotations` key is pinned to
# `level == "low"` (pick-and-place) inside get_keymap when
# `high_annotation_key` is set.
# 2. filters select both SORT episodes (task == 'sort', which carry the
# high-level "Sorting" track -> real high != low training signal) and
# pick_place episodes (high == low via the use-as-both fallback). Without
# this override the inherited filter is `task == 'pick_place'` only, so no
# episode would have a high-level track and the subtask objective would
# degenerate to high == low for every frame.
#
# Pair with model config `pi0.5_subtask`.
train_datasets:
eva_bimanual:
resolver:
key_map:
high_annotation_key: annotations_high
filters:
_target_: egomimic.rldb.filters.ScaleAnnotationDatasetFilter
project_name: "dense-language"
filter_lambdas:
- "lambda row: (row.get('embodiment') == 'eva_bimanual') & (row['task'] in ('sort', 'pick_place')) & (row['zarr_processed_path'] != '')"
aria_bimanual:
resolver:
key_map:
high_annotation_key: annotations_high
filters:
_target_: egomimic.rldb.filters.ScaleAnnotationDatasetFilter
project_name: "dense-language"
filter_lambdas:
- "lambda row: (row.get('embodiment') == 'aria_bimanual') & (row['task'] in ('sort', 'pick_place')) & (row['zarr_processed_path'] != '')"

valid_datasets:
eva_bimanual:
filters:
_target_: egomimic.rldb.filters.ScaleAnnotationDatasetFilter
project_name: "dense-language"
filter_lambdas:
- "lambda row: row.get('embodiment') == 'eva_bimanual' and row['task'] in ('sort', 'pick_place') and row['zarr_processed_path'] != '' and 'alignment' not in (row.get('task_description') or '')"
aria_bimanual:
filters:
_target_: egomimic.rldb.filters.ScaleAnnotationDatasetFilter
project_name: "dense-language"
filter_lambdas:
- "lambda row: row.get('embodiment') == 'aria_bimanual' and row['task'] in ('sort', 'pick_place') and row['zarr_processed_path'] != '' and 'alignment' not in (row.get('task_description') or '')"
4 changes: 3 additions & 1 deletion egomimic/hydra_configs/evaluator/viz/cotrain_lang.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ eva_bimanual:
aria_bimanual:
_target_: egomimic.rldb.embodiment.human.Aria.viz_gt_preds
_partial_: true
image_key: observations.images.front_img_1
# cotrain_pi_lang uses keymap_mode=cartesian_pi, which names the front image
# `base_0_rgb` (not the `cartesian`-style observations.images.front_img_1).
image_key: base_0_rgb
action_key: actions_cartesian
mode: traj
annotation_key: annotations
Expand Down
26 changes: 26 additions & 0 deletions egomimic/hydra_configs/model/pi0.5_subtask.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defaults:
- pi0.5_cotrain_eva_aria_6d
- _self_

# PI0.5 with hierarchical subtask prediction (sort regime). The model conditions
# on the HIGH-level sort instruction and predicts the LOW-level pick-and-place
# instruction as language tokens (auxiliary cross-entropy), in addition to the
# action flow-matching loss. At inference the subtask is decoded
# autoregressively and the action expert is conditioned on the realized
# [high + subtask] prefix. See egomimic/models/pi0_subtask.py and
# egomimic/algo/pi.py.
#
# Pair with data config `sort_pi_subtask` (registers the `annotations_high`
# high-level view of the annotation array).
robomimic_model:
subtask_prediction: true
# High-level (sort) goal conditions the model; low-level (pick-and-place) is
# the prediction target. Both read the same zarr `annotations` array, split by
# the `level` tag the SortConverter writes (see get_keymap high_annotation_key).
annotation_key: "annotations_high"
subtask_key: "annotations"
subtask_anchor: "Subtask: "
# Relative weight of the subtask CE loss vs. the action flow-matching loss.
subtask_loss_weight: 1.0
# Max autoregressively-decoded subtask length (tokens) at inference.
max_subtask_len: 48
Loading