vector17002 · vector17002 · Apr 24, 2026 · Apr 24, 2026 · Apr 25, 2026 · coderabbitai
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -24,13 +24,12 @@ services:
       - ./server/.env
     environment:
       - IOREDIS_URL=redis://redis:6379
-      - DATABASE_URL=${DATABASE_URL}
     depends_on:
       redis:
         condition: service_healthy
     restart: unless-stopped
 
-  # ── Workers (transcode + HLS + thumbnail) ───────────────────────────
+  # ── Transcode Worker (FFmpeg — HLS + thumbnails) ─────────
   worker:
     build:
       context: ./server
@@ -40,16 +39,37 @@ services:
       - ./server/.env
     environment:
       - IOREDIS_URL=redis://redis:6379
-      - DATABASE_URL=${DATABASE_URL}
     depends_on:
       redis:
         condition: service_healthy
     restart: unless-stopped
-    # Workers may need more memory for FFmpeg transcoding
     deploy:
       resources:
         limits:
           memory: 2G
 
+  # ── AI Worker (FFmpeg + faster-whisper transcription) ────
+  ai-worker:
+    build:
+      context: ./server
+      dockerfile: Dockerfile
+      target: ai-worker
+      args:
+        # Change to tiny/small/medium/large as needed
+        WHISPER_MODEL: base
+    env_file:
+      - ./server/.env
+    environment:
+      - IOREDIS_URL=redis://redis:6379
+    depends_on:
+      redis:
+        condition: service_healthy
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          # faster-whisper base model needs ~1 GB; bump for larger models
+          memory: 2G
+
 volumes:
   redis_data:
diff --git a/server/Dockerfile b/server/Dockerfile
@@ -47,3 +47,48 @@ COPY --from=builder /app/dist ./dist
 COPY package.json ./
 
 CMD ["node", "dist/worker.js"]
+
+
+# ── Stage 3c: AI Worker (FFmpeg + faster-whisper) ──────────────────────────
+# Uses node:22-slim (Debian) instead of Alpine because PyAV (a faster-whisper
+# dependency) has no pre-built musl/Alpine wheels and its Cython compilation
+# fails on Python 3.12 + Alpine. Debian has pre-built wheels → no compilation
+# needed, faster build, smaller attack surface.
+#
+# Available WHISPER_MODEL values (ascending size / accuracy):
+#   tiny (~75 MB) | base (~150 MB) | small (~500 MB) | medium | large
+FROM node:22-slim AS ai-worker
+
+# 1. System packages (Debian-based)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ffmpeg \
+        python3 \
+        python3-pip \
+        python3-venv \
+    && rm -rf /var/lib/apt/lists/*
+
+# 2. Isolated Python venv + faster-whisper (CTranslate2-based, no PyTorch)
+#    Pre-built PyAV wheels on Debian → no compilation required.
+ENV VIRTUAL_ENV=/opt/whisper-venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+RUN python3 -m venv "$VIRTUAL_ENV" && \
+    pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir faster-whisper
+
+# 3. Pre-download model weights at build time so startup is instant
+#    compute_type=int8 → efficient CPU inference, no accuracy loss for most tasks
+ARG WHISPER_MODEL=base
+ENV WHISPER_MODEL=${WHISPER_MODEL}
+RUN python3 -c "\
+from faster_whisper import WhisperModel; \
+WhisperModel('${WHISPER_MODEL}', device='cpu', compute_type='int8')"
+
+# 4. Node.js application
+WORKDIR /app
+
+COPY --from=prod-deps /app/node_modules ./node_modules
+COPY --from=builder   /app/dist         ./dist
+COPY package.json ./
+
+CMD ["node", "dist/ai-worker.js"]
diff --git a/server/drizzle/0001_glorious_shooting_star.sql b/server/drizzle/0001_glorious_shooting_star.sql
@@ -0,0 +1,2 @@
+ALTER TABLE "videoTable" ADD COLUMN "transcript_key" text;--> statement-breakpoint
+ALTER TABLE "videoTable" ADD CONSTRAINT "videoTable_user_id_userTable_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."userTable"("id") ON DELETE no action ON UPDATE no action;
diff --git a/server/drizzle/meta/0001_snapshot.json b/server/drizzle/meta/0001_snapshot.json
@@ -0,0 +1,232 @@
+{
+  "id": "5c118634-fc94-41c5-b74e-d0a850974c31",
+  "prevId": "586a540f-f245-48df-be82-03b662009de0",
+  "version": "7",
+  "dialect": "postgresql",
+  "tables": {
+    "public.userTable": {
+      "name": "userTable",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "email": {
+          "name": "email",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "password": {
+          "name": "password",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    },
+    "public.videoTable": {
+      "name": "videoTable",
+      "schema": "",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true
+        },
+        "user_id": {
+          "name": "user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true
+        },
+        "status": {
+          "name": "status",
+          "type": "status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'not-started'"
+        },
+        "trancodeStatus": {
+          "name": "trancodeStatus",
+          "type": "transcode_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'not-started'"
+        },
+        "hlsStatus": {
+          "name": "hlsStatus",
+          "type": "hls_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'not-started'"
+        },
+        "thumbnailStatus": {
+          "name": "thumbnailStatus",
+          "type": "thumbnail_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'not-started'"
+        },
+        "transcriptStatus": {
+          "name": "transcriptStatus",
+          "type": "transcript_status",
+          "typeSchema": "public",
+          "primaryKey": false,
+          "notNull": true,
+          "default": "'not-started'"
+        },
+        "transcript_key": {
+          "name": "transcript_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "original_video_key": {
+          "name": "original_video_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "hls_manifest_key": {
+          "name": "hls_manifest_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "thumbnail_video_key": {
+          "name": "thumbnail_video_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false
+        },
+        "created_at": {
+          "name": "created_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "timestamp",
+          "primaryKey": false,
+          "notNull": false,
+          "default": "now()"
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {
+        "videoTable_user_id_userTable_id_fk": {
+          "name": "videoTable_user_id_userTable_id_fk",
+          "tableFrom": "videoTable",
+          "tableTo": "userTable",
+          "columnsFrom": [
+            "user_id"
+          ],
+          "columnsTo": [
+            "id"
+          ],
+          "onDelete": "no action",
+          "onUpdate": "no action"
+        }
+      },
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "policies": {},
+      "checkConstraints": {},
+      "isRLSEnabled": false
+    }
+  },
+  "enums": {
+    "public.hls_status": {
+      "name": "hls_status",
+      "schema": "public",
+      "values": [
+        "not-started",
+        "processing",
+        "completed",
+        "failed"
+      ]
+    },
+    "public.status": {
+      "name": "status",
+      "schema": "public",
+      "values": [
+        "not-started",
+        "processing",
+        "completed",
+        "failed"
+      ]
+    },
+    "public.thumbnail_status": {
+      "name": "thumbnail_status",
+      "schema": "public",
+      "values": [
+        "not-started",
+        "processing",
+        "completed",
+        "failed"
+      ]
+    },
+    "public.transcode_status": {
+      "name": "transcode_status",
+      "schema": "public",
+      "values": [
+        "not-started",
+        "processing",
+        "completed",
+        "failed"
+      ]
+    },
+    "public.transcript_status": {
+      "name": "transcript_status",
+      "schema": "public",
+      "values": [
+        "not-started",
+        "processing",
+        "completed",
+        "failed"
+      ]
+    }
+  },
+  "schemas": {},
+  "sequences": {},
+  "roles": {},
+  "policies": {},
+  "views": {},
+  "_meta": {
+    "columns": {},
+    "schemas": {},
+    "tables": {}
+  }
+}
diff --git a/server/drizzle/meta/_journal.json b/server/drizzle/meta/_journal.json
@@ -8,6 +8,13 @@
       "when": 1776514578126,
       "tag": "0000_far_gideon",
       "breakpoints": true
+    },
+    {
+      "idx": 1,
+      "version": "7",
+      "when": 1776997181954,
+      "tag": "0001_glorious_shooting_star",
+      "breakpoints": true
     }
   ]
 }
diff --git a/server/src/ai-worker.ts b/server/src/ai-worker.ts
@@ -0,0 +1,10 @@
+import 'dotenv/config.js'
+import './utils/logger.js'
+import './config/db.js'
+
+// Only the transcription worker runs in this container.
+// The regular worker (transcode/hls/thumbnail) runs in the worker image
+// which doesn't have Python or faster-whisper installed.
+import './workers/transcribe.worker.js'
+
+console.log('🚀 AI Worker started and listening for transcription jobs...')
diff --git a/server/src/models/video.model.ts b/server/src/models/video.model.ts
@@ -1,4 +1,5 @@
 import { pgEnum, pgTable, serial, text, timestamp } from "drizzle-orm/pg-core";
+import { userTable } from "./user.model.js";
 
 export const statusEnum = pgEnum('status', ['not-started', 'processing', 'completed', 'failed']);
 export const trancodeStatusEnum = pgEnum('transcode_status', ['not-started', 'processing', 'completed', 'failed']);
@@ -8,12 +9,13 @@ export const transciptStatusEnum = pgEnum('transcript_status', ['not-started', '
 
 export const videoTable = pgTable('videoTable', {
     id: text('id').primaryKey(),
-    userId: text('user_id').notNull(),
+    userId: text('user_id').notNull().references(() => userTable.id),
     status: statusEnum().notNull().default('not-started'),
     trancodeStatus: trancodeStatusEnum().notNull().default('not-started'),
     hlsStatus: hlsStatusEnum().notNull().default('not-started'),
     thumbnailStatus: thumbnailStatusEnum().notNull().default('not-started'),
     transcriptStatus: transciptStatusEnum().notNull().default('not-started'),
+    transcriptKey: text('transcript_key'),
     originalVideoKey: text('original_video_key'),
     hlsManifestKey: text('hls_manifest_key'),
     thumbnailVideoKey: text('thumbnail_video_key'),

diff --git a/server/src/services/transcode.service.ts b/server/src/services/transcode.service.ts
@@ -12,7 +12,7 @@ const __dirname = path.dirname(__filename);
 
 export const getPreSignedUrlForDownload = async (fileId: string, userId: string) => {
     const currentEnv = process.env.NODE_ENV === 'development' ? 'dev' : 'prod';
-    const videoObjectId = `${currentEnv}/users/${userId}/original/${fileId}`;
+    const videoObjectId = `${currentEnv}/users/${userId}/${fileId}/original`;
 
     const videoDownloadSignedUrl = await getDownloadUrl(videoObjectId)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		ALTER TABLE "videoTable" ADD COLUMN "transcript_key" text;--> statement-breakpoint
		ALTER TABLE "videoTable" ADD CONSTRAINT "videoTable_user_id_userTable_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."userTable"("id") ON DELETE no action ON UPDATE no action;
Comment on lines +1 to +2 Copy link Copy Markdown coderabbitai Bot Apr 25, 2026 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. ⚠️ Potential issue \| 🟡 Minor Confirm the FK `ON DELETE NO ACTION` behavior is intentional, and that the migration is safe to run on existing data. Two things worth double-checking before this ships to production: `ON DELETE NO ACTION` means deleting a `userTable` row that still has rows in `videoTable` will hard-fail at the DB layer. If the product expectation is that deleting a user removes (or anonymizes) their videos, you probably want `ON DELETE CASCADE` or `SET NULL` here. If retention is intentional, ignore. Adding the FK constraint will fail the migration on any existing `videoTable.user_id` value that does not have a matching `userTable.id` (orphan rows). For a fresh dev DB this is fine; for prod, run a pre-flight check / cleanup before deploying: SELECT v.id, v.user_id FROM "videoTable" v LEFT JOIN "userTable" u ON u.id = v.user_id WHERE u.id IS NULL; 🤖 Prompt for AI Agents Verify each finding against the current code and only fix it if needed. In `@server/drizzle/0001_glorious_shooting_star.sql` around lines 1 - 2, The migration adds transcript_key to "videoTable" and a FK "videoTable_user_id_userTable_id_fk" referencing "userTable" with ON DELETE NO ACTION; confirm that NO ACTION is intentional—if deleting a user should remove or null their videos change the constraint to ON DELETE CASCADE or ON DELETE SET NULL on the FK in the migration (or adjust application behavior), and before applying to prod run a pre-flight check for orphaned rows in "videoTable" (any v.user_id without a matching userTable.id) and either delete or fix those rows so adding the FK won't fail; update the migration SQL accordingly and document the chosen strategy.