-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhandler.py
More file actions
87 lines (64 loc) · 2.45 KB
/
handler.py
File metadata and controls
87 lines (64 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""RunPod serverless handler — Qwen3-TTS VoiceDesign.
Loads the model once on cold start, then processes voice design requests.
Input: {"description": str, "preview_text": str, "language": str}
Output: {"audio_b64": str} (base64-encoded WAV)
"""
import base64
import io
import logging
import os
import runpod
import torch
import soundfile as sf
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
for _lib in ("transformers", "torch", "huggingface_hub", "qwen_tts", "urllib3", "runpod"):
logging.getLogger(_lib).setLevel(logging.WARNING)
# ── Model loading (runs once on cold start) ──
MODEL = None
def load_model():
global MODEL
if MODEL is not None:
return MODEL
logger.info("Loading Qwen3-TTS-VoiceDesign model...")
from qwen_tts import Qwen3TTSModel
MODEL = Qwen3TTSModel.from_pretrained(
"Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign",
device_map="cuda:0",
torch_dtype=torch.bfloat16,
)
logger.info("Model loaded successfully")
return MODEL
# ── Handler ──
def handler(event):
"""Process a voice design request."""
try:
inp = event.get("input", {})
description = inp.get("description", "")
preview_text = inp.get("preview_text", "Hello, this is a voice preview.")
language = inp.get("language", "en")
if not description:
return {"error": "No description provided"}
model = load_model()
# Map language codes to full names
lang_map = {"en": "english", "de": "german", "fr": "french", "es": "spanish"}
lang_full = lang_map.get(language, "english")
logger.info(f"Generating voice design: {len(description)} chars, lang={lang_full}")
wavs, sr = model.generate_voice_design(
text=preview_text,
instruct=description,
language=lang_full,
)
# Convert to WAV bytes
wav_data = wavs[0].cpu().numpy() if hasattr(wavs[0], 'cpu') else wavs[0]
buf = io.BytesIO()
sf.write(buf, wav_data, sr, format="WAV")
wav_bytes = buf.getvalue()
audio_b64 = base64.b64encode(wav_bytes).decode("utf-8")
logger.info(f"Voice design complete: {len(wav_bytes)} bytes")
return {"audio_b64": audio_b64}
except Exception as e:
logger.error(f"Handler error: {e}", exc_info=True)
return {"error": str(e)}
runpod.serverless.start({"handler": handler})