aws-samples · cod-all · Apr 16, 2026 · Apr 16, 2026
diff --git a/usecases/README.md b/usecases/README.md
@@ -3,4 +3,5 @@
 
 This repository contains fully functional, standalone implementations of the following use cases:
 
-- [Product catalog](./product_catalog) - product catalog use case implementation using DocumentDB
+- [Product catalog](./product_catalog) - product catalog use case implementation using Amazon DocumentDB
+- [Media asset workbench](./media_asset_workbench) - media asset processing pipeline using Amazon S3 Files
diff --git a/usecases/media_asset_workbench/.gitignore b/usecases/media_asset_workbench/.gitignore
@@ -0,0 +1,23 @@
+# Local configuration — contains live endpoints, never commit
+config.env
+!config.env.example
+
+# SSH keys
+*.pem
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+
+# macOS
+.DS_Store
+
+# Build artefacts
+worker.tar.gz
+rds-combined-ca-bundle.pem
+
+# Generated sample data
+sample-data/
diff --git a/usecases/media_asset_workbench/README.md b/usecases/media_asset_workbench/README.md
diff --git a/usecases/media_asset_workbench/cleanup.sh b/usecases/media_asset_workbench/cleanup.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+# ────────────────────────────────────────────────────────────────────────────
+# Media Asset Workbench - Cleanup Script
+# Tears down all resources created by deploy.sh.
+# Usage: ./cleanup.sh
+# ────────────────────────────────────────────────────────────────────────────
+set -euo pipefail
+export AWS_PAGER=""
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+if [ ! -f "${SCRIPT_DIR}/config.env" ]; then
+  echo "ERROR: config.env not found. Nothing to clean up." >&2
+  exit 1
+fi
+
+source "${SCRIPT_DIR}/config.env"
+
+: "${STACK_NAME:?STACK_NAME not set in config.env}"
+: "${AWS_REGION:?AWS_REGION not set in config.env}"
+: "${BUCKET_NAME:?BUCKET_NAME not set in config.env}"
+
+ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+DEPLOY_BUCKET="${STACK_NAME}-deploy-${ACCOUNT_ID}-${AWS_REGION}"
+
+echo "=== Media Asset Workbench Cleanup ==="
+echo "Stack:         ${STACK_NAME}"
+echo "Region:        ${AWS_REGION}"
+echo "Asset Bucket:  ${BUCKET_NAME}"
+echo "Deploy Bucket: ${DEPLOY_BUCKET}"
+echo ""
+read -p "Are you sure you want to delete everything? (y/N) " -n 1 -r
+echo ""
+[[ $REPLY =~ ^[Yy]$ ]] || { echo "Cancelled."; exit 0; }
+
+# ── 1. Empty the asset bucket (versioned) ─────────────────────────────────────
+echo "--- Emptying asset bucket: ${BUCKET_NAME}"
+aws s3 rm "s3://${BUCKET_NAME}" --recursive --region "${AWS_REGION}" 2>/dev/null || true
+
+echo "    Deleting object versions..."
+aws s3api list-object-versions --bucket "${BUCKET_NAME}" --region "${AWS_REGION}" \
+  --query '{Objects: Versions[].{Key:Key,VersionId:VersionId}}' --output json 2>/dev/null \
+  | aws s3api delete-objects --bucket "${BUCKET_NAME}" --delete file:///dev/stdin --region "${AWS_REGION}" > /dev/null 2>&1 || true
+
+echo "    Deleting delete markers..."
+aws s3api list-object-versions --bucket "${BUCKET_NAME}" --region "${AWS_REGION}" \
+  --query '{Objects: DeleteMarkers[].{Key:Key,VersionId:VersionId}}' --output json 2>/dev/null \
+  | aws s3api delete-objects --bucket "${BUCKET_NAME}" --delete file:///dev/stdin --region "${AWS_REGION}" > /dev/null 2>&1 || true
+
+echo "    Asset bucket emptied"
+
+# ── 2. Delete S3 Files filesystem (if created) ────────────────────────────────
+echo "--- Checking for S3 Files filesystem..."
+FS_ID=$(aws s3files list-file-systems \
+  --bucket "arn:aws:s3:::${BUCKET_NAME}" \
+  --query "fileSystems[0].fileSystemId" \
+  --output text --region "${AWS_REGION}" 2>/dev/null || echo "")
+
+if [ -n "${FS_ID}" ] && [ "${FS_ID}" != "None" ]; then
+  echo "    Deleting mount targets for filesystem: ${FS_ID}"
+  # Get all mount target IDs and delete them
+  MT_IDS=$(aws s3files list-mount-targets \
+    --file-system-id "${FS_ID}" \
+    --query "mountTargets[].mountTargetId" \
+    --output text --region "${AWS_REGION}" 2>/dev/null || echo "")
+  for MT_ID in ${MT_IDS}; do
+    aws s3files delete-mount-target --mount-target-id "${MT_ID}" --region "${AWS_REGION}" 2>/dev/null || true
+  done
+  echo "    Waiting for mount targets to be deleted..."
+  sleep 30
+  echo "    Deleting filesystem: ${FS_ID}"
+  aws s3files delete-file-system --file-system-id "${FS_ID}" --region "${AWS_REGION}" 2>/dev/null || true
+else
+  echo "    No S3 Files filesystem found"
+fi
+
+# ── 3. Delete the CloudFormation stack ─────────────────────────────────────────
+echo "--- Deleting CloudFormation stack: ${STACK_NAME}"
+aws cloudformation delete-stack --stack-name "${STACK_NAME}" --region "${AWS_REGION}"
+echo "    Waiting for stack deletion..."
+aws cloudformation wait stack-delete-complete --stack-name "${STACK_NAME}" --region "${AWS_REGION}" 2>/dev/null || true
+echo "    Stack deleted"
+
+# ── 4. Delete the deployment bucket ───────────────────────────────────────────
+echo "--- Deleting deploy bucket: ${DEPLOY_BUCKET}"
+aws s3 rb "s3://${DEPLOY_BUCKET}" --force --region "${AWS_REGION}" 2>/dev/null || true
+echo "    Deploy bucket deleted"
+
+echo ""
+echo "============================================================"
+echo "  Cleanup complete!"
+echo "============================================================"
+echo ""
+echo "  Don't forget to:"
+echo "  - Remove the /etc/hosts entry for DocumentDB:"
+echo "    sudo sed -i '' '/${DOCDB_ENDPOINT:-docdb}/d' /etc/hosts"
+echo "  - Close any open SSM port forward sessions"
+echo "============================================================"
diff --git a/usecases/media_asset_workbench/config.env.example b/usecases/media_asset_workbench/config.env.example
@@ -0,0 +1,30 @@
+# Copy this to config.env and fill in values after running deploy.sh
+# cp config.env.example config.env
+
+# AWS (these have defaults — only change if needed)
+AWS_REGION=us-east-1
+STACK_NAME=media-asset-workbench
+
+# Populated by deploy.sh
+BUCKET_NAME=
+DOCDB_ENDPOINT=
+DOCDB_CLUSTER=
+DOCDB_SECRET_ARN=
+WORKER_IP=
+WORKER_INSTANCE_ID=
+SUBNET_ID=
+SECURITY_GROUP_ID=
+S3FILES_ROLE_ARN=
+
+# Path to RDS CA bundle (downloaded by deploy.sh)
+CA_BUNDLE_PATH=./rds-combined-ca-bundle.pem
+
+# EC2 key pair name (must exist in your AWS account in the target region)
+# Use the key pair NAME only, not the .pem filename.
+# Example: if your key file is "mykey.pem", set KEY_PAIR_NAME=mykey
+KEY_PAIR_NAME=
+
+# Your public IP for SSH access to the worker instance.
+# Must be in CIDR notation with /32 suffix.
+# Run: echo "$(curl -s https://checkip.amazonaws.com)/32"
+MY_IP=
diff --git a/usecases/media_asset_workbench/deploy.sh b/usecases/media_asset_workbench/deploy.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# ────────────────────────────────────────────────────────────────────────────
+# Media Asset Workbench - Deploy Script
+# Usage: ./deploy.sh [--region us-east-1] [--stack media-asset-workbench]
+# Requires: aws CLI v2, tar
+# ────────────────────────────────────────────────────────────────────────────
+set -euo pipefail
+export AWS_PAGER=""
+
+STACK_NAME="${STACK_NAME:-media-asset-workbench}"
+AWS_REGION="${AWS_REGION:-us-east-1}"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+
+# Load config.env if present
+[ -f "${SCRIPT_DIR}/config.env" ] && source "${SCRIPT_DIR}/config.env"
+
+: "${KEY_PAIR_NAME:?Set KEY_PAIR_NAME in config.env}"
+: "${MY_IP:?Set MY_IP in config.env (your public IP, e.g. 1.2.3.4/32)}"
+
+echo "=== Media Asset Workbench Deploy ==="
+echo "Stack:  ${STACK_NAME}"
+echo "Region: ${AWS_REGION}"
+echo ""
+
+# ── 1. Create deployment S3 bucket (if needed) ────────────────────────────────
+DEPLOY_BUCKET="${STACK_NAME}-deploy-$(aws sts get-caller-identity --query Account --output text)-${AWS_REGION}"
+echo "--- Ensuring deploy bucket: ${DEPLOY_BUCKET}"
+aws s3api head-bucket --bucket "${DEPLOY_BUCKET}" > /dev/null 2>&1 || \
+  aws s3 mb "s3://${DEPLOY_BUCKET}" --region "${AWS_REGION}"
+
+# ── 2. Package and upload worker code ─────────────────────────────────────────
+echo "--- Packaging worker"
+WORKER_TAR=$(mktemp /tmp/worker-XXXXXX.tar.gz)
+trap "rm -f ${WORKER_TAR}" EXIT
+tar -czf "${WORKER_TAR}" -C "${SCRIPT_DIR}/worker" .
+aws s3 cp "${WORKER_TAR}" "s3://${DEPLOY_BUCKET}/worker.tar.gz" --region "${AWS_REGION}"
+echo "    Worker packaged and uploaded"
+
+# ── 3. Deploy/update CloudFormation stack ────────────────────────────────────
+echo "--- Deploying CloudFormation stack"
+aws cloudformation deploy \
+  --stack-name "${STACK_NAME}" \
+  --template-file "${SCRIPT_DIR}/infrastructure/cloudformation.yaml" \
+  --region "${AWS_REGION}" \
+  --capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM \
+  --parameter-overrides \
+    KeyPairName="${KEY_PAIR_NAME}" \
+    MyIP="${MY_IP}" \
+  --no-fail-on-empty-changeset
+
+echo "    Stack deployed"
+
+# ── 4. Fetch outputs ──────────────────────────────────────────────────────────
+echo "--- Fetching stack outputs"
+OUTPUTS=$(aws cloudformation describe-stacks \
+  --stack-name "${STACK_NAME}" \
+  --region "${AWS_REGION}" \
+  --query 'Stacks[0].Outputs' \
+  --output json)
+
+get_output() {
+  echo "${OUTPUTS}" | python3 -c "
+import json, sys
+outputs = json.load(sys.stdin)
+key = sys.argv[1]
+for o in outputs:
+    if o['OutputKey'] == key:
+        print(o['OutputValue'])
+" "$1"
+}
+
+BUCKET_NAME=$(get_output BucketName)
+DOCDB_ENDPOINT=$(get_output DocDBEndpoint)
+WORKER_IP=$(get_output WorkerPublicIP)
+WORKER_INSTANCE_ID=$(get_output WorkerInstanceId)
+DOCDB_CLUSTER=$(get_output DocDBClusterIdentifier)
+DOCDB_SECRET_ARN=$(get_output DocDBSecretArn)
+SUBNET_ID=$(get_output SubnetId)
+SECURITY_GROUP_ID=$(get_output MountTargetSGId)
+S3FILES_ROLE_ARN=$(get_output S3FilesRoleArn)
+
+# ── 4b. Copy worker code to asset bucket (EC2 pulls from here on first boot) ──
+echo "--- Copying worker code to asset bucket"
+aws s3 cp "s3://${DEPLOY_BUCKET}/worker.tar.gz" "s3://${BUCKET_NAME}/worker.tar.gz" --region "${AWS_REGION}"
+echo "    Worker code copied"
+
+# ── 5. Write config.env ───────────────────────────────────────────────────────
+echo "--- Writing config.env"
+cat > "${SCRIPT_DIR}/config.env" << ENV
+AWS_REGION=${AWS_REGION}
+STACK_NAME=${STACK_NAME}
+BUCKET_NAME=${BUCKET_NAME}
+DOCDB_ENDPOINT=${DOCDB_ENDPOINT}
+DOCDB_CLUSTER=${DOCDB_CLUSTER}
+DOCDB_SECRET_ARN=${DOCDB_SECRET_ARN}
+WORKER_IP=${WORKER_IP}
+WORKER_INSTANCE_ID=${WORKER_INSTANCE_ID}
+KEY_PAIR_NAME=${KEY_PAIR_NAME}
+MY_IP=${MY_IP}
+CA_BUNDLE_PATH=${SCRIPT_DIR}/rds-combined-ca-bundle.pem
+SUBNET_ID=${SUBNET_ID}
+SECURITY_GROUP_ID=${SECURITY_GROUP_ID}
+S3FILES_ROLE_ARN=${S3FILES_ROLE_ARN}
+ENV
+
+# ── 6. Download RDS CA bundle for local DocumentDB TLS ────────────────────────
+echo "--- Downloading RDS CA bundle"
+curl -sL https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem \
+  -o "${SCRIPT_DIR}/rds-combined-ca-bundle.pem"
+echo "    CA bundle saved to rds-combined-ca-bundle.pem"
+
+echo ""
+echo "============================================================"
+echo "  Deploy complete!"
+echo "============================================================"
+echo ""
+echo "  S3 Bucket:       ${BUCKET_NAME}"
+echo "  DocumentDB:      ${DOCDB_ENDPOINT}"
+echo "  Worker IP:       ${WORKER_IP}"
+echo "  Worker ID:       ${WORKER_INSTANCE_ID}"
+echo "  DocDB Cluster:   ${DOCDB_CLUSTER}"
+echo ""
+echo "  NEXT STEPS:"
+echo ""
+echo "  1. Generate and upload sample data (on your laptop):"
+echo "     ./generate-sample-data.sh"
+echo "     aws s3 sync ./sample-data/ s3://${BUCKET_NAME}/sample-packs/ --region ${AWS_REGION}"
+echo ""
+echo "  2. Set up S3 Files (run on your local machine — executes userdata.sh remotely):"
+echo "     chmod 400 ~/.ssh/${KEY_PAIR_NAME}.pem   # or ./${KEY_PAIR_NAME}.pem if key is here"
+echo "     ssh -i ~/.ssh/${KEY_PAIR_NAME}.pem ec2-user@${WORKER_IP} \\"
+echo "       \"sudo BUCKET_NAME=${BUCKET_NAME} AWS_REGION=${AWS_REGION} \\"
+echo "        SUBNET_ID=${SUBNET_ID} SECURITY_GROUP_ID=${SECURITY_GROUP_ID} \\"
+echo "        S3FILES_ROLE_ARN=${S3FILES_ROLE_ARN} bash /opt/worker/userdata.sh\""
+echo ""
+echo "  3. Before running the UI, open the DocumentDB tunnel:"
+echo "     # Add to /etc/hosts (one-time, remove after teardown):"
+echo "     echo '127.0.0.1 ${DOCDB_ENDPOINT}' | sudo tee -a /etc/hosts"
+echo ""
+echo "     # Open SSM port forward (keep this terminal open while using the UI):"
+echo "     aws ssm start-session \\"
+echo "       --target ${WORKER_INSTANCE_ID} \\"
+echo "       --region ${AWS_REGION} \\"
+echo "       --document-name AWS-StartPortForwardingSessionToRemoteHost \\"
+echo "       --parameters 'host=${DOCDB_ENDPOINT},portNumber=27017,localPortNumber=27017'"
+echo ""
+echo "  4. Start the local UI (new terminal on your laptop):"
+echo "     cd ui && pip install -r requirements.txt"
+echo "     uvicorn app:app --reload --port 8080"
+echo "     Then open http://127.0.0.1:8080 in your browser"
+echo ""
+echo "  5. Stop DocumentDB when done to save cost:"
+echo "     aws docdb stop-db-cluster --db-cluster-identifier ${DOCDB_CLUSTER} --region ${AWS_REGION}"
+echo ""
+echo "  6. Tear down everything:"
+echo "     ./cleanup.sh"
+echo "============================================================"