diff --git a/bun.lock b/bun.lock
index 5d472fd5..241f28c2 100644
--- a/bun.lock
+++ b/bun.lock
@@ -13,6 +13,7 @@
"lottie-web": "^5.12.2",
"lucide-react": "^0.469.0",
"next": "^15.1.8",
+ "openai": "^6.42.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"reframe": ".",
@@ -819,6 +820,8 @@
"obug": ["obug@2.1.1", "", {}, "sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ=="],
+ "openai": ["openai@6.42.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"] }, "sha512-1WFEt/uXMXOLhYRNkgJWo08Y2YNvNwpVU72K7ibrWgWpNOXd4VojXLbe6SQ4bLiUQ3Y8jz4IiyVkylJCL1DtZg=="],
+
"optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="],
"own-keys": ["own-keys@1.0.1", "", { "dependencies": { "get-intrinsic": "^1.2.6", "object-keys": "^1.1.1", "safe-push-apply": "^1.0.0" } }, "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg=="],
diff --git a/src/app/api/transcribe/route.ts b/src/app/api/transcribe/route.ts
new file mode 100644
index 00000000..b372b18f
--- /dev/null
+++ b/src/app/api/transcribe/route.ts
@@ -0,0 +1,45 @@
+import { NextResponse } from "next/server";
+import OpenAI from "openai";
+
+// Create OpenAI client
+const openai = new OpenAI({
+ apiKey: process.env.OPENAI_API_KEY,
+});
+
+export async function POST(req: Request) {
+ try {
+ // Get uploaded file from request
+ const formData = await req.formData();
+ const file = formData.get("file") as File | null;
+
+ if (!file) {
+ return NextResponse.json(
+ { error: "No file uploaded" },
+ { status: 400 }
+ );
+ }
+
+ // Send file to Whisper API and request segment timestamps
+ const transcription = await openai.audio.transcriptions.create({
+ file,
+ model: "whisper-1",
+ response_format: "verbose_json",
+ });
+
+ // Return transcript text and timestamps if the Whisper response provides them
+ return NextResponse.json({
+ text: transcription.text,
+ segments: (transcription as any).segments ?? null,
+ });
+
+ } catch (error: any) {
+ console.error("FULL ERROR:", error);
+
+ return NextResponse.json(
+ {
+ error: error?.message || "Failed to transcribe audio",
+ },
+ { status: 500 }
+ );
+}
+}
diff --git a/src/components/VideoEditor.tsx b/src/components/VideoEditor.tsx
index a12c1f41..59cc74bf 100644
--- a/src/components/VideoEditor.tsx
+++ b/src/components/VideoEditor.tsx
@@ -1,6 +1,6 @@
"use client";
-import { useState, useRef, useEffect, useMemo } from "react";
+import { useState, useRef, useEffect, useMemo, useCallback } from "react";
import { useVideoEditor } from "@/hooks/useVideoEditor";
import { TextOverlay } from "@/lib/types";
import FileUpload from "./FileUpload";
@@ -199,33 +199,102 @@ function KeyboardShortcutsPanel() {
export default function VideoEditor() {
const {
- file, duration, recipe, status, progress,
- result, error, exportStartedAt, updateRecipe,
- handleFileSelect, fileError, handleExport, cancelExport, reset, resetSettings,
+ file,
+ duration,
+ recipe,
+ addSubtitle,
+ generateSubtitles, // ✅ ADD THIS HERE
+ status,
+ progress,
+ result,
+ error,
+ exportStartedAt,
+ updateRecipe,
+ handleFileSelect,
+ fileError,
+ handleExport,
+ cancelExport,
+ reset,
+ resetSettings,
videoRef,
seekTo,
- overlayFile, setOverlayFile,
- overlayPosition, setOverlayPosition,
- overlaySize, setOverlaySize,
- overlayOpacity, setOverlayOpacity,
+ overlayFile,
+ setOverlayFile,
+ overlayPosition,
+ setOverlayPosition,
+ overlaySize,
+ setOverlaySize,
+ overlayOpacity,
+ setOverlayOpacity,
recommendedPreset,
currentTime,
toggleSound,
} = useVideoEditor();
- useKeyboardShortcuts({
- file,
- recipe,
- resetSettings,
- updateRecipe,
- handleExport,
- status,
- cancelExport,
- onToggleShortcutsModal: () => {},
- });
-
const [copied, setCopied] = useState(false);
const [shareCopied, setShareCopied] = useState(false);
+ const [isTranscribing, setIsTranscribing] = useState(false);
+
+ const handleGenerateSubtitles = useCallback(async () => {
+ if (!file) return;
+ setIsTranscribing(true);
+
+ try {
+ const formData = new FormData();
+ formData.append("file", file);
+
+ const res = await fetch("/api/transcribe", {
+ method: "POST",
+ body: formData,
+ });
+
+ const data = await res.json();
+
+ if (!res.ok) {
+ throw new Error(data.error || "Subtitle generation failed.");
+ }
+
+ const segments = Array.isArray(data.segments) ? data.segments : [];
+ const fallbackDuration = duration && duration > 0 ? duration : 999999;
+ const subtitles = segments.length > 0
+ ? segments.map((segment: any) => ({
+ id: crypto.randomUUID(),
+ text: String(segment.text || "").trim(),
+ startTime: Number.isFinite(segment.start) ? segment.start : 0,
+ endTime: Number.isFinite(segment.end) ? segment.end : fallbackDuration,
+ x: 50,
+ y: 90,
+ fontSize: 24,
+ color: "#ffffff",
+ }))
+ : data.text
+ ? [
+ {
+ id: crypto.randomUUID(),
+ text: String(data.text).trim(),
+ startTime: 0,
+ endTime: fallbackDuration,
+ x: 50,
+ y: 90,
+ fontSize: 24,
+ color: "#ffffff",
+ },
+ ]
+ : [];
+
+ if (subtitles.length === 0) {
+ throw new Error("No subtitles were generated.");
+ }
+
+ updateRecipe({ subtitles });
+ } catch (err: any) {
+ console.error("Subtitle generation error:", err);
+ alert(err?.message || "Unable to generate subtitles. Please try again.");
+ } finally {
+ setIsTranscribing(false);
+ }
+ }, [duration, file, updateRecipe]);
+
const initialOverlayState = useRef({
overlayPosition,
overlaySize,
@@ -499,6 +568,25 @@ return () => {
onSelectText={setSelectedTextId}
/>
+ }
+ title="Subtitles"
+ delay={115}
+>
+
+
+
+ {recipe.subtitles.length} subtitle(s)
+
+
+
{
+ const video = videoRef.current;
+ if (!video) return;
+
+ const handleTimeUpdate = () => {
+ setCurrentTime(video.currentTime);
+ };
+
+ video.addEventListener("timeupdate", handleTimeUpdate);
+ return () => {
+ video.removeEventListener("timeupdate", handleTimeUpdate);
+ };
+ }, [videoRef]);
+
/**
* Track preview container dimensions for text overlay positioning.
*/
@@ -282,6 +297,31 @@ export default function VideoPreview({
>
+ {/* Subtitles Layer */}
+{recipe?.subtitles?.length ? (
+
+ {recipe.subtitles
+ .filter((sub) => currentTime >= sub.startTime && currentTime <= sub.endTime)
+ .map((sub) => (
+
+ {sub.text}
+
+ ))}
+
+) : null}
{/* Phase 1 MVP: Multi-track overlay rendering */}
{multiTrackState && multiTrackVideoRefs && multiTrackState.timelineTracks.length > 1 && (
@@ -315,6 +355,7 @@ export default function VideoPreview({
>
+
);
})}
diff --git a/src/hooks/useVideoEditor.ts b/src/hooks/useVideoEditor.ts
index f2edb45c..d6e7a2bb 100644
--- a/src/hooks/useVideoEditor.ts
+++ b/src/hooks/useVideoEditor.ts
@@ -1,7 +1,16 @@
"use client";
import { useState, useCallback, useEffect, useRef, useMemo } from "react";
-import { EditRecipe, ExportResult, ExportStatus, MAX_FILE_SIZE, OverlayPosition, TimelineTrack, MultiTrackEditorState } from "@/lib/types";
+import {
+ EditRecipe,
+ ExportResult,
+ ExportStatus,
+ MAX_FILE_SIZE,
+ OverlayPosition,
+ TimelineTrack,
+ MultiTrackEditorState,
+ Subtitle,
+} from "@/lib/types";
import { DEFAULT_RECIPE, SPEED_STEPS } from "@/lib/constants";
import { getPresetById } from "@/lib/presets";
import { loadFFmpeg, exportVideo, terminateFFmpeg, FFmpegLoadError } from "@/lib/ffmpeg";
@@ -210,17 +219,112 @@ export function useVideoEditor() {
addTrack(track);
return track;
}, [addTrack]);
-
- const updateRecipe = useCallback((patch: Partial) => {
+const updateRecipe = useCallback((patch: Partial) => {
setRecipe((prev) => {
- const next = { ...prev, ...patch };
- // GIF has no audio — force keepAudio off
+ const next = {
+ ...prev,
+ subtitles: prev.subtitles ?? [], // ✅ safety
+ ...patch,
+ };
+
if (next.format === "gif") {
next.keepAudio = false;
}
+
return next;
});
}, []);
+
+const addSubtitle = useCallback(() => {
+ const newSubtitle: Subtitle = {
+ id: crypto.randomUUID(),
+ text: "Sample Subtitle",
+ startTime: 0,
+ endTime: 3,
+ x: 50,
+ y: 85,
+ fontSize: 24,
+ color: "#ffffff",
+ };
+
+ updateRecipe({
+ subtitles: [...(recipe.subtitles ?? []), newSubtitle],
+ });
+}, [recipe.subtitles, updateRecipe]);
+
+const removeSubtitle = useCallback((id: string) => {
+ updateRecipe({
+ subtitles: (recipe.subtitles ?? []).filter((s) => s.id !== id),
+ });
+}, [recipe.subtitles, updateRecipe]);
+
+const updateSubtitle = useCallback(
+ (id: string, updates: Partial) => {
+ updateRecipe({
+ subtitles: (recipe.subtitles ?? []).map((s) =>
+ s.id === id ? { ...s, ...updates } : s
+ ),
+ });
+ },
+ [recipe.subtitles, updateRecipe]
+);
+const generateSubtitles = useCallback(() => {
+ const video = videoRef.current;
+ if (!video) return;
+
+ const SpeechRecognition =
+ (window as any).SpeechRecognition ||
+ (window as any).webkitSpeechRecognition;
+
+ if (!SpeechRecognition) {
+ alert("Speech Recognition not supported (use Chrome)");
+ return;
+ }
+
+ const recognition = new SpeechRecognition();
+ recognition.continuous = true;
+ recognition.interimResults = false;
+ recognition.lang = "en-US";
+
+ let startTime = 0;
+ const tempSubtitles: Subtitle[] = [];
+
+ recognition.onresult = (event: any) => {
+ const transcript =
+ event.results[event.results.length - 1][0].transcript;
+
+ const currentTime = video.currentTime;
+
+ tempSubtitles.push({
+ id: crypto.randomUUID(),
+ text: transcript,
+ startTime,
+ endTime: currentTime,
+ x: 50,
+ y: 85,
+ fontSize: 24,
+ color: "#ffffff", // ✅ white subtitles
+ });
+
+ startTime = currentTime;
+ };
+
+ recognition.onerror = (err: any) => {
+ console.error("Speech recognition error:", err);
+ };
+
+ recognition.start();
+ video.play();
+
+ video.onended = () => {
+ recognition.stop();
+
+ updateRecipe({
+ subtitles: tempSubtitles,
+ });
+ };
+}, [videoRef, updateRecipe]);
+
const isValidValue = (key: keyof EditRecipe, val: any): boolean => {
switch (key) {
case "preset":
@@ -642,7 +746,6 @@ export function useVideoEditor() {
setExportStartedAt(null);
}, []);
-
const reset = useCallback(() => {
if (result?.blobUrl) URL.revokeObjectURL(result.blobUrl);
setFile(null);
@@ -726,5 +829,9 @@ export function useVideoEditor() {
removeTrack,
updateTrack,
addVideoTrack,
+ addSubtitle,
+removeSubtitle,
+updateSubtitle,
+generateSubtitles,
};
}
diff --git a/src/lib/constants.ts b/src/lib/constants.ts
index e713f779..1b86a0f1 100644
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -23,5 +23,6 @@ export const DEFAULT_RECIPE: EditRecipe = {
soundOnCompletion: false,
normalizeAudio: false,
textOverlays: [],
+ subtitles: [],
version: RECIPE_VERSION,
};
diff --git a/src/lib/editorPersistence.ts b/src/lib/editorPersistence.ts
index 4b51537d..50bb3ff2 100644
--- a/src/lib/editorPersistence.ts
+++ b/src/lib/editorPersistence.ts
@@ -16,7 +16,11 @@ export function migrateRecipe(recipe: Partial): EditRecipe {
return {
...DEFAULT_RECIPE,
...recipe,
- textOverlays: Array.isArray(recipe.textOverlays) ? recipe.textOverlays : [],
+ textOverlays: Array.isArray(recipe.textOverlays) ? recipe.textOverlays
+ : [],
+ subtitles: Array.isArray(recipe.subtitles)
+ ? recipe.subtitles
+ : [],
};
}
diff --git a/src/lib/types.ts b/src/lib/types.ts
index 90827df9..5aea246e 100644
--- a/src/lib/types.ts
+++ b/src/lib/types.ts
@@ -1,4 +1,18 @@
-export const RECIPE_VERSION = 1;
+export interface Subtitle {
+ id: string;
+
+ text: string;
+
+ startTime: number;
+ endTime: number;
+
+ x: number;
+ y: number;
+
+ fontSize: number;
+ color: string;
+}
+export const RECIPE_VERSION = 2;
/**
* Text overlay data structure for rendering custom text on videos.
@@ -35,6 +49,7 @@ export interface EditRecipe {
saturation: number;
soundOnCompletion: boolean;
textOverlays: TextOverlay[];
+ subtitles: Subtitle[];
version: number;
}
@@ -142,6 +157,8 @@ export function isValidRecipe(value: unknown): value is EditRecipe {
if (typeof v.saturation !== "number" || !isFinite(v.saturation)) return false;
if (typeof v.soundOnCompletion !== "boolean") return false;
if (!Array.isArray(v.textOverlays)) return false;
+ if (!Array.isArray(v.subtitles)) return false;
return true;
}
+