Download datasets from HuggingFace and convert to standardized format.
# Install
pip install -r requirements.txt
# Get dataset → outputs to data/questions/
python scripts/process_dataset.py
# Upload to S3
python utils/upload_to_s3.py --action upload --input data/questions --bucket BUCKETdata/questions/{domain}_task/{task_id}/
├── first_frame.png
├── final_frame.png
├── prompt.txt
└── ground_truth.mp4 (optional)
export AWS_ACCESS_KEY_ID="key"
export AWS_SECRET_ACCESS_KEY="secret"
export AWS_DEFAULT_REGION="us-east-1"