diff --git a/bun.lock b/bun.lock index 5d472fd5..241f28c2 100644 --- a/bun.lock +++ b/bun.lock @@ -13,6 +13,7 @@ "lottie-web": "^5.12.2", "lucide-react": "^0.469.0", "next": "^15.1.8", + "openai": "^6.42.0", "react": "^19.0.0", "react-dom": "^19.0.0", "reframe": ".", @@ -819,6 +820,8 @@ "obug": ["obug@2.1.1", "", {}, "sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ=="], + "openai": ["openai@6.42.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"] }, "sha512-1WFEt/uXMXOLhYRNkgJWo08Y2YNvNwpVU72K7ibrWgWpNOXd4VojXLbe6SQ4bLiUQ3Y8jz4IiyVkylJCL1DtZg=="], + "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="], "own-keys": ["own-keys@1.0.1", "", { "dependencies": { "get-intrinsic": "^1.2.6", "object-keys": "^1.1.1", "safe-push-apply": "^1.0.0" } }, "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg=="], diff --git a/src/app/api/transcribe/route.ts b/src/app/api/transcribe/route.ts new file mode 100644 index 00000000..b372b18f --- /dev/null +++ b/src/app/api/transcribe/route.ts @@ -0,0 +1,45 @@ +import { NextResponse } from "next/server"; +import OpenAI from "openai"; + +// Create OpenAI client +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +export async function POST(req: Request) { + try { + // Get uploaded file from request + const formData = await req.formData(); + const file = formData.get("file") as File | null; + + if (!file) { + return NextResponse.json( + { error: "No file uploaded" }, + { status: 400 } + ); + } + + // Send file to Whisper API and request segment timestamps + const transcription = await openai.audio.transcriptions.create({ + file, + model: "whisper-1", + response_format: "verbose_json", + }); + + // Return transcript text and timestamps if the Whisper response provides them + return NextResponse.json({ + text: transcription.text, + segments: (transcription as any).segments ?? null, + }); + + } catch (error: any) { + console.error("FULL ERROR:", error); + + return NextResponse.json( + { + error: error?.message || "Failed to transcribe audio", + }, + { status: 500 } + ); +} +} diff --git a/src/components/VideoEditor.tsx b/src/components/VideoEditor.tsx index a12c1f41..59cc74bf 100644 --- a/src/components/VideoEditor.tsx +++ b/src/components/VideoEditor.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useRef, useEffect, useMemo } from "react"; +import { useState, useRef, useEffect, useMemo, useCallback } from "react"; import { useVideoEditor } from "@/hooks/useVideoEditor"; import { TextOverlay } from "@/lib/types"; import FileUpload from "./FileUpload"; @@ -199,33 +199,102 @@ function KeyboardShortcutsPanel() { export default function VideoEditor() { const { - file, duration, recipe, status, progress, - result, error, exportStartedAt, updateRecipe, - handleFileSelect, fileError, handleExport, cancelExport, reset, resetSettings, + file, + duration, + recipe, + addSubtitle, + generateSubtitles, // ✅ ADD THIS HERE + status, + progress, + result, + error, + exportStartedAt, + updateRecipe, + handleFileSelect, + fileError, + handleExport, + cancelExport, + reset, + resetSettings, videoRef, seekTo, - overlayFile, setOverlayFile, - overlayPosition, setOverlayPosition, - overlaySize, setOverlaySize, - overlayOpacity, setOverlayOpacity, + overlayFile, + setOverlayFile, + overlayPosition, + setOverlayPosition, + overlaySize, + setOverlaySize, + overlayOpacity, + setOverlayOpacity, recommendedPreset, currentTime, toggleSound, } = useVideoEditor(); - useKeyboardShortcuts({ - file, - recipe, - resetSettings, - updateRecipe, - handleExport, - status, - cancelExport, - onToggleShortcutsModal: () => {}, - }); - const [copied, setCopied] = useState(false); const [shareCopied, setShareCopied] = useState(false); + const [isTranscribing, setIsTranscribing] = useState(false); + + const handleGenerateSubtitles = useCallback(async () => { + if (!file) return; + setIsTranscribing(true); + + try { + const formData = new FormData(); + formData.append("file", file); + + const res = await fetch("/api/transcribe", { + method: "POST", + body: formData, + }); + + const data = await res.json(); + + if (!res.ok) { + throw new Error(data.error || "Subtitle generation failed."); + } + + const segments = Array.isArray(data.segments) ? data.segments : []; + const fallbackDuration = duration && duration > 0 ? duration : 999999; + const subtitles = segments.length > 0 + ? segments.map((segment: any) => ({ + id: crypto.randomUUID(), + text: String(segment.text || "").trim(), + startTime: Number.isFinite(segment.start) ? segment.start : 0, + endTime: Number.isFinite(segment.end) ? segment.end : fallbackDuration, + x: 50, + y: 90, + fontSize: 24, + color: "#ffffff", + })) + : data.text + ? [ + { + id: crypto.randomUUID(), + text: String(data.text).trim(), + startTime: 0, + endTime: fallbackDuration, + x: 50, + y: 90, + fontSize: 24, + color: "#ffffff", + }, + ] + : []; + + if (subtitles.length === 0) { + throw new Error("No subtitles were generated."); + } + + updateRecipe({ subtitles }); + } catch (err: any) { + console.error("Subtitle generation error:", err); + alert(err?.message || "Unable to generate subtitles. Please try again."); + } finally { + setIsTranscribing(false); + } + }, [duration, file, updateRecipe]); + const initialOverlayState = useRef({ overlayPosition, overlaySize, @@ -499,6 +568,25 @@ return () => { onSelectText={setSelectedTextId} /> +
} + title="Subtitles" + delay={115} +> + + +
+ {recipe.subtitles.length} subtitle(s) +
+
+
{ + const video = videoRef.current; + if (!video) return; + + const handleTimeUpdate = () => { + setCurrentTime(video.currentTime); + }; + + video.addEventListener("timeupdate", handleTimeUpdate); + return () => { + video.removeEventListener("timeupdate", handleTimeUpdate); + }; + }, [videoRef]); + /** * Track preview container dimensions for text overlay positioning. */ @@ -282,6 +297,31 @@ export default function VideoPreview({ > + {/* Subtitles Layer */} +{recipe?.subtitles?.length ? ( +
+ {recipe.subtitles + .filter((sub) => currentTime >= sub.startTime && currentTime <= sub.endTime) + .map((sub) => ( +
+ {sub.text} +
+ ))} +
+) : null} {/* Phase 1 MVP: Multi-track overlay rendering */} {multiTrackState && multiTrackVideoRefs && multiTrackState.timelineTracks.length > 1 && ( @@ -315,6 +355,7 @@ export default function VideoPreview({ > + ); })}
diff --git a/src/hooks/useVideoEditor.ts b/src/hooks/useVideoEditor.ts index f2edb45c..d6e7a2bb 100644 --- a/src/hooks/useVideoEditor.ts +++ b/src/hooks/useVideoEditor.ts @@ -1,7 +1,16 @@ "use client"; import { useState, useCallback, useEffect, useRef, useMemo } from "react"; -import { EditRecipe, ExportResult, ExportStatus, MAX_FILE_SIZE, OverlayPosition, TimelineTrack, MultiTrackEditorState } from "@/lib/types"; +import { + EditRecipe, + ExportResult, + ExportStatus, + MAX_FILE_SIZE, + OverlayPosition, + TimelineTrack, + MultiTrackEditorState, + Subtitle, +} from "@/lib/types"; import { DEFAULT_RECIPE, SPEED_STEPS } from "@/lib/constants"; import { getPresetById } from "@/lib/presets"; import { loadFFmpeg, exportVideo, terminateFFmpeg, FFmpegLoadError } from "@/lib/ffmpeg"; @@ -210,17 +219,112 @@ export function useVideoEditor() { addTrack(track); return track; }, [addTrack]); - - const updateRecipe = useCallback((patch: Partial) => { +const updateRecipe = useCallback((patch: Partial) => { setRecipe((prev) => { - const next = { ...prev, ...patch }; - // GIF has no audio — force keepAudio off + const next = { + ...prev, + subtitles: prev.subtitles ?? [], // ✅ safety + ...patch, + }; + if (next.format === "gif") { next.keepAudio = false; } + return next; }); }, []); + +const addSubtitle = useCallback(() => { + const newSubtitle: Subtitle = { + id: crypto.randomUUID(), + text: "Sample Subtitle", + startTime: 0, + endTime: 3, + x: 50, + y: 85, + fontSize: 24, + color: "#ffffff", + }; + + updateRecipe({ + subtitles: [...(recipe.subtitles ?? []), newSubtitle], + }); +}, [recipe.subtitles, updateRecipe]); + +const removeSubtitle = useCallback((id: string) => { + updateRecipe({ + subtitles: (recipe.subtitles ?? []).filter((s) => s.id !== id), + }); +}, [recipe.subtitles, updateRecipe]); + +const updateSubtitle = useCallback( + (id: string, updates: Partial) => { + updateRecipe({ + subtitles: (recipe.subtitles ?? []).map((s) => + s.id === id ? { ...s, ...updates } : s + ), + }); + }, + [recipe.subtitles, updateRecipe] +); +const generateSubtitles = useCallback(() => { + const video = videoRef.current; + if (!video) return; + + const SpeechRecognition = + (window as any).SpeechRecognition || + (window as any).webkitSpeechRecognition; + + if (!SpeechRecognition) { + alert("Speech Recognition not supported (use Chrome)"); + return; + } + + const recognition = new SpeechRecognition(); + recognition.continuous = true; + recognition.interimResults = false; + recognition.lang = "en-US"; + + let startTime = 0; + const tempSubtitles: Subtitle[] = []; + + recognition.onresult = (event: any) => { + const transcript = + event.results[event.results.length - 1][0].transcript; + + const currentTime = video.currentTime; + + tempSubtitles.push({ + id: crypto.randomUUID(), + text: transcript, + startTime, + endTime: currentTime, + x: 50, + y: 85, + fontSize: 24, + color: "#ffffff", // ✅ white subtitles + }); + + startTime = currentTime; + }; + + recognition.onerror = (err: any) => { + console.error("Speech recognition error:", err); + }; + + recognition.start(); + video.play(); + + video.onended = () => { + recognition.stop(); + + updateRecipe({ + subtitles: tempSubtitles, + }); + }; +}, [videoRef, updateRecipe]); + const isValidValue = (key: keyof EditRecipe, val: any): boolean => { switch (key) { case "preset": @@ -642,7 +746,6 @@ export function useVideoEditor() { setExportStartedAt(null); }, []); - const reset = useCallback(() => { if (result?.blobUrl) URL.revokeObjectURL(result.blobUrl); setFile(null); @@ -726,5 +829,9 @@ export function useVideoEditor() { removeTrack, updateTrack, addVideoTrack, + addSubtitle, +removeSubtitle, +updateSubtitle, +generateSubtitles, }; } diff --git a/src/lib/constants.ts b/src/lib/constants.ts index e713f779..1b86a0f1 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -23,5 +23,6 @@ export const DEFAULT_RECIPE: EditRecipe = { soundOnCompletion: false, normalizeAudio: false, textOverlays: [], + subtitles: [], version: RECIPE_VERSION, }; diff --git a/src/lib/editorPersistence.ts b/src/lib/editorPersistence.ts index 4b51537d..50bb3ff2 100644 --- a/src/lib/editorPersistence.ts +++ b/src/lib/editorPersistence.ts @@ -16,7 +16,11 @@ export function migrateRecipe(recipe: Partial): EditRecipe { return { ...DEFAULT_RECIPE, ...recipe, - textOverlays: Array.isArray(recipe.textOverlays) ? recipe.textOverlays : [], + textOverlays: Array.isArray(recipe.textOverlays) ? recipe.textOverlays + : [], + subtitles: Array.isArray(recipe.subtitles) + ? recipe.subtitles + : [], }; } diff --git a/src/lib/types.ts b/src/lib/types.ts index 90827df9..5aea246e 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -1,4 +1,18 @@ -export const RECIPE_VERSION = 1; +export interface Subtitle { + id: string; + + text: string; + + startTime: number; + endTime: number; + + x: number; + y: number; + + fontSize: number; + color: string; +} +export const RECIPE_VERSION = 2; /** * Text overlay data structure for rendering custom text on videos. @@ -35,6 +49,7 @@ export interface EditRecipe { saturation: number; soundOnCompletion: boolean; textOverlays: TextOverlay[]; + subtitles: Subtitle[]; version: number; } @@ -142,6 +157,8 @@ export function isValidRecipe(value: unknown): value is EditRecipe { if (typeof v.saturation !== "number" || !isFinite(v.saturation)) return false; if (typeof v.soundOnCompletion !== "boolean") return false; if (!Array.isArray(v.textOverlays)) return false; + if (!Array.isArray(v.subtitles)) return false; return true; } +