Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 01_getting_started/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.flash/
2 changes: 1 addition & 1 deletion 01_getting_started/01_hello_world/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Server starts at **http://localhost:8888**
Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@remote` functions.

```bash
curl -X POST http://localhost:8888/gpu_worker/run_sync \
curl -X POST http://localhost:8888/gpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{"message": "Hello GPU!"}'
```
Expand Down
4 changes: 2 additions & 2 deletions 01_getting_started/01_hello_world/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


@remote(resource_config=gpu_config)
async def gpu_hello(input_data: dict) -> dict:
async def gpu_hello(payload: dict) -> dict:
"""Simple GPU worker that returns GPU hardware info."""
import platform
from datetime import datetime
Expand All @@ -25,7 +25,7 @@ async def gpu_hello(input_data: dict) -> dict:
gpu_count = torch.cuda.device_count()
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)

message = input_data.get("message", "Hello from GPU worker!")
message = payload.get("message", "Hello from GPU worker!")

return {
"status": "success",
Expand Down
1 change: 1 addition & 0 deletions 01_getting_started/02_cpu_worker/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,4 @@ uv.lock
# OS
.DS_Store
Thumbs.db
.flash/
2 changes: 1 addition & 1 deletion 01_getting_started/02_cpu_worker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Server starts at **http://localhost:8888**
Visit **http://localhost:8888/docs** for interactive API documentation. QB endpoints are auto-generated by `flash run` based on your `@remote` functions.

```bash
curl -X POST http://localhost:8888/cpu_worker/run_sync \
curl -X POST http://localhost:8888/cpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{"name": "Flash User"}'
```
Expand Down
4 changes: 2 additions & 2 deletions 01_getting_started/02_cpu_worker/cpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@


@remote(resource_config=cpu_config)
async def cpu_hello(input_data: dict) -> dict:
async def cpu_hello(payload: dict) -> dict:
"""Simple CPU worker that returns a greeting."""
import platform
from datetime import datetime

message = f"Hello, {input_data.get('name', 'Anonymous Panda')}!"
message = f"Hello, {payload.get('name', 'Anonymous Panda')}!"

return {
"status": "success",
Expand Down
4 changes: 2 additions & 2 deletions 01_getting_started/03_mixed_workers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ curl -X POST http://localhost:8888/classify \
-d '{"text": "This product is amazing! I love it!"}'

# Individual stages
curl -X POST http://localhost:8888/cpu_worker/run_sync \
curl -X POST http://localhost:8888/cpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{"text": "Test message"}'

curl -X POST http://localhost:8888/gpu_worker/run_sync \
curl -X POST http://localhost:8888/gpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{"cleaned_text": "Test message", "word_count": 2}'
```
Expand Down
12 changes: 6 additions & 6 deletions 01_getting_started/03_mixed_workers/cpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@


@remote(resource_config=cpu_config)
async def preprocess_text(input_data: dict) -> dict:
async def preprocess_text(payload: dict) -> dict:
"""Preprocess text: cleaning and tokenization (cheap CPU work)."""
import re
from datetime import datetime

text = input_data.get("text", "")
text = payload.get("text", "")

cleaned_text = text.strip()
cleaned_text = re.sub(r"\s+", " ", cleaned_text)
Expand All @@ -39,13 +39,13 @@ async def preprocess_text(input_data: dict) -> dict:


@remote(resource_config=cpu_config)
async def postprocess_results(input_data: dict) -> dict:
async def postprocess_results(payload: dict) -> dict:
"""Postprocess GPU results: formatting and aggregation (cheap CPU work)."""
from datetime import datetime

predictions = input_data.get("predictions", [])
original_text = input_data.get("original_text", "")
metadata = input_data.get("metadata", {})
predictions = payload.get("predictions", [])
original_text = payload.get("original_text", "")
metadata = payload.get("metadata", {})

if predictions:
top_prediction = max(predictions, key=lambda x: x["confidence"])
Expand Down
6 changes: 3 additions & 3 deletions 01_getting_started/03_mixed_workers/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@


@remote(resource_config=gpu_config, dependencies=["torch"])
async def gpu_inference(input_data: dict) -> dict:
async def gpu_inference(payload: dict) -> dict:
"""GPU inference: mock sentiment classification."""
import random
from datetime import datetime

import torch

cleaned_text = input_data.get("cleaned_text", "")
word_count = input_data.get("word_count", 0)
cleaned_text = payload.get("cleaned_text", "")
word_count = payload.get("word_count", 0)

gpu_available = torch.cuda.is_available()
if gpu_available:
Expand Down
9 changes: 9 additions & 0 deletions 01_getting_started/03_mixed_workers/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,12 @@ async def classify(text: str) -> dict:
},
}
)


if __name__ == "__main__":
import asyncio

test_text = "This is a test message for the classification pipeline."
print(f"Testing classify with text: {test_text}")
result = asyncio.run(classify(test_text))
print(f"Result: {result}")
1 change: 1 addition & 0 deletions 01_getting_started/04_dependencies/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,4 @@ uv.lock
# OS
.DS_Store
Thumbs.db
.flash/
8 changes: 4 additions & 4 deletions 01_getting_started/04_dependencies/cpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"matplotlib",
],
)
async def process_data(input_data: dict) -> dict:
async def process_data(payload: dict) -> dict:
"""
Worker with data science dependencies.

Expand All @@ -46,7 +46,7 @@ async def process_data(input_data: dict) -> dict:
import pandas as pd
import scipy

data = input_data.get("data", [[1, 2], [3, 4], [5, 6]])
data = payload.get("data", [[1, 2], [3, 4], [5, 6]])

# Create DataFrame and compute statistics
df = pd.DataFrame(data, columns=["A", "B"])
Expand Down Expand Up @@ -80,7 +80,7 @@ async def process_data(input_data: dict) -> dict:


@remote(resource_config=minimal_config) # No dependencies!
async def minimal_process(input_data: dict) -> dict:
async def minimal_process(payload: dict) -> dict:
"""
Worker with NO external dependencies.

Expand All @@ -93,7 +93,7 @@ async def minimal_process(input_data: dict) -> dict:
import re
from datetime import datetime

text = input_data.get("text", "")
text = payload.get("text", "")

# Built-in operations only
word_count = len(text.split())
Expand Down
4 changes: 2 additions & 2 deletions 01_getting_started/04_dependencies/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"numpy<2.0.0", # Maximum version constraint
],
)
async def process_with_ml_libs(input_data: dict) -> dict:
async def process_with_ml_libs(payload: dict) -> dict:
"""
Worker with versioned Python dependencies.

Expand Down Expand Up @@ -74,7 +74,7 @@ async def process_with_ml_libs(input_data: dict) -> dict:
dependencies=["opencv-python", "requests"],
system_dependencies=["ffmpeg", "libgl1"], # System packages via apt
)
async def process_with_system_deps(input_data: dict) -> dict:
async def process_with_system_deps(payload: dict) -> dict:
"""
Worker with system-level dependencies.

Expand Down
4 changes: 2 additions & 2 deletions 02_ml_inference/01_text_to_speech/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,14 @@ Visit http://localhost:8888/docs for interactive API documentation. QB endpoints

**Generate speech (JSON with base64 audio):**
```bash
curl -X POST http://localhost:8888/gpu_worker/run_sync \
curl -X POST http://localhost:8888/gpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{"text": "Hello world!", "speaker": "Ryan", "language": "English"}'
```

**List available voices:**
```bash
curl -X POST http://localhost:8888/gpu_worker/run_sync \
curl -X POST http://localhost:8888/gpu_worker/runsync \
-H "Content-Type: application/json" \
-d '{}'
```
Comment on lines 44 to 56
Copy link

Copilot AI Feb 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both curl examples (“Generate speech” and “List available voices”) call the same /gpu_worker/runsync endpoint. Since get_voices and generate_speech are separate @remote functions, this doesn’t show how to actually invoke the voices endpoint. Update the docs to use the correct route for get_voices (or explain the dispatch mechanism if /runsync can target multiple functions).

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this is something i don't quite understand, if there are multiple decorated functions will it always default to one? it feels a little weird to have all functions regardless of name pointed to by "runsync"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jhcipar Yes, that's the case for Queue-based Endpoints. When it is deployed, it is baked as the handler for an endpoint. So then we're stuck with just one. I thought about supporting multiple handlers per endpoint, but that completely deviates from our standard serverless endpoint design. I did not want to introduce any whiplash effect for that experience.

Expand Down
12 changes: 6 additions & 6 deletions 02_ml_inference/01_text_to_speech/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"soundfile",
],
)
async def generate_speech(input_data: dict) -> dict:
async def generate_speech(payload: dict) -> dict:
"""
Generate speech using Qwen3-TTS-12Hz-1.7B-CustomVoice model.

Expand Down Expand Up @@ -72,10 +72,10 @@ async def generate_speech(input_data: dict) -> dict:
"Auto",
]

text = input_data.get("text", "Hello, this is a test.")
speaker = input_data.get("speaker", "Ryan")
language = input_data.get("language", "Auto")
instruct = input_data.get("instruct", "")
text = payload.get("text", "Hello, this is a test.")
speaker = payload.get("speaker", "Ryan")
language = payload.get("language", "Auto")
instruct = payload.get("instruct", "")

if speaker not in valid_speakers:
return {
Expand Down Expand Up @@ -133,7 +133,7 @@ async def generate_speech(input_data: dict) -> dict:


@remote(resource_config=gpu_config, dependencies=["qwen-tts"])
async def get_voices(input_data: dict) -> dict:
async def get_voices(payload: dict) -> dict:
"""Get available voices and languages."""
speakers = {
"Vivian": "Bright, slightly edgy young female voice (Chinese native)",
Expand Down
Loading