Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Pin line endings on text files so cross-platform contributors don't
# see phantom "modified" diffs from autocrlf-driven CRLF<->LF flips.
#
# Background: Windows users with `core.autocrlf=true` (the Git for
# Windows default) see Cargo.toml / tauri.conf.json / etc. as modified
# the moment they `git checkout` because the working-tree copy gets
# rewritten with CRLF while origin's blobs are LF. Without this file,
# every status check on Windows lights those up as dirty even though
# no real change was made. With this file, git normalizes them on the
# way in and out and the status stays clean.

# Default: treat as text, normalize to LF in the index. The working
# tree gets the platform's native line ending on checkout (LF on
# macOS/Linux, LF on Windows-with-`core.eol=lf`, CRLF on
# Windows-with-default-config).
* text=auto

# Repo-shape files MUST stay LF in the working tree everywhere -- the
# Tauri / Cargo / npm toolchains all read them with LF assumptions
# even on Windows, and a CRLF-shaped tauri.conf.json caused real
# parse failures earlier in the project history (see the patch-
# tauri-conf.mjs script's "self-heal an empty/corrupt JSON" branch).
*.toml text eol=lf
*.json text eol=lf
*.yml text eol=lf
*.yaml text eol=lf
*.md text eol=lf

# Source files: LF everywhere. Vite + tsc handle either, but pinning
# avoids whitespace-only diffs in PRs.
*.ts text eol=lf
*.tsx text eol=lf
*.js text eol=lf
*.jsx text eol=lf
*.mjs text eol=lf
*.cjs text eol=lf
*.py text eol=lf
*.rs text eol=lf
*.css text eol=lf
*.html text eol=lf

# Shell scripts: LF (would otherwise silently break on macOS / Linux
# with "bad interpreter" errors when bash sees \r in the shebang).
*.sh text eol=lf

# PowerShell: CRLF. The PS 5.1 parser handles either but PowerShell
# scripts authored on Windows traditionally ship CRLF, and Windows
# editors would otherwise rewrite them on save and produce noise.
*.ps1 text eol=crlf
*.psm1 text eol=crlf
*.psd1 text eol=crlf

# Binary blobs that Git would otherwise try to diff/normalize. Mark
# them explicitly so a `text=auto` heuristic mistake can't corrupt
# them on a cross-platform clone.
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.webp binary
*.ico binary
*.icns binary
*.woff binary
*.woff2 binary
*.ttf binary
*.otf binary
*.zip binary
*.gz binary
*.tar binary
*.7z binary
*.exe binary
*.dll binary
*.so binary
*.dylib binary
*.pyd binary
*.safetensors binary
*.gguf binary
*.bin binary
*.onnx binary
31 changes: 22 additions & 9 deletions CLAUDE.md

Large diffs are not rendered by default.

27 changes: 26 additions & 1 deletion backend_service/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ class ToolCallResult:
arguments: dict[str, Any]
result: str
elapsed_seconds: float
# Phase 2.8: optional structured output the frontend can render
# natively (table / code / markdown / image / chart). When None,
# the legacy collapsible-JSON renderer fires. The `result` text
# field is always populated so the language model sees something
# readable on the next turn regardless of UI rendering.
render_as: str | None = None
data: dict[str, Any] | None = None


@dataclass
Expand Down Expand Up @@ -108,8 +115,19 @@ def _execute_tool_call(
)

start = time.perf_counter()
render_as: str | None = None
structured_data: dict[str, Any] | None = None
try:
result_text = tool.execute(**arguments)
# Phase 2.8: try the structured entry first. Tools that
# haven't migrated return None and we fall back to the
# plain-text path below.
structured = tool.execute_structured(**arguments)
if structured is not None:
result_text = structured.text
render_as = structured.render_as
structured_data = structured.data
else:
result_text = tool.execute(**arguments)
except Exception as exc:
result_text = f"Error executing {tool_name}: {exc}"
elapsed = round(time.perf_counter() - start, 3)
Expand All @@ -122,6 +140,8 @@ def _execute_tool_call(
arguments=arguments,
result=result_text,
elapsed_seconds=elapsed,
render_as=render_as,
data=structured_data,
)


Expand Down Expand Up @@ -384,6 +404,11 @@ def run_agent_loop_streaming(
"name": tc_result.tool_name,
"result": tc_result.result[:2000], # Cap for streaming
"elapsed": tc_result.elapsed_seconds,
# Phase 2.8: stream the structured shape so the
# frontend can render it as the tool finishes
# rather than waiting for the final done payload.
"renderAs": tc_result.render_as,
"data": tc_result.data,
},
}

Expand Down
84 changes: 80 additions & 4 deletions backend_service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@
CHAT_SESSIONS_PATH = DATA_LOCATION.chat_sessions_path
LIBRARY_CACHE_PATH = DATA_LOCATION.data_dir / "library_cache.json"
DOCUMENTS_DIR = DATA_LOCATION.documents_dir
WORKSPACES_PATH = DATA_LOCATION.workspaces_path
WORKSPACES_DIR = DATA_LOCATION.workspaces_dir
IMAGE_OUTPUTS_DIR = DATA_LOCATION.image_outputs_dir
VIDEO_OUTPUTS_DIR = DATA_LOCATION.video_outputs_dir
MAX_DOC_SIZE_BYTES = 50 * 1024 * 1024 # 50 MB per file
Expand Down Expand Up @@ -355,6 +357,20 @@ def _generate_image_artifacts(
logger.info("Generating image: model=%s repo=%s size=%dx%d steps=%d draft=%s",
variant.get("name"), variant.get("repo"), effective_width, effective_height, request.steps, request.draftMode)
runtime_manager = runtime_manager or ImageRuntimeManager()
# FU-019: variant-declared defaults override schema defaults only
# when the user hasn't moved the slider. Schema defaults (24 steps,
# CFG 5.5) come from ImageGenerationRequest in models/__init__.py.
SCHEMA_DEFAULT_STEPS = 24
SCHEMA_DEFAULT_GUIDANCE = 5.5
effective_steps = request.steps
effective_guidance = request.guidance
variant_default_steps = variant.get("defaultSteps")
variant_cfg_override = variant.get("cfgOverride")
if variant_default_steps is not None and request.steps == SCHEMA_DEFAULT_STEPS:
effective_steps = int(variant_default_steps)
if variant_cfg_override is not None and abs(request.guidance - SCHEMA_DEFAULT_GUIDANCE) < 1e-3:
effective_guidance = float(variant_cfg_override)

rendered_images, runtime_status = runtime_manager.generate(
ImageGenerationConfig(
modelId=request.modelId,
Expand All @@ -364,15 +380,39 @@ def _generate_image_artifacts(
negativePrompt=request.negativePrompt or "",
width=effective_width,
height=effective_height,
steps=request.steps,
guidance=request.guidance,
steps=effective_steps,
guidance=effective_guidance,
batchSize=request.batchSize,
seed=request.seed,
qualityPreset=request.qualityPreset,
sampler=request.sampler,
ggufRepo=(variant.get("ggufRepo") or None),
ggufFile=(variant.get("ggufFile") or None),
runtime=(variant.get("engine") or None),
cacheStrategy=request.cacheStrategy,
cacheRelL1Thresh=request.cacheRelL1Thresh,
cfgDecay=request.cfgDecay,
previewVae=request.previewVae,
# FU-019: variant-declared LoRA + step / guidance overrides.
# When the catalog variant pins a Hyper-SD / FLUX-Turbo /
# lightx2v LoRA, the engine fuses it into the pipeline at
# load time. ``defaultSteps`` / ``cfgOverride`` substitute
# only when the user kept the schema defaults — explicit
# slider tweaks survive untouched.
loraRepo=(variant.get("loraRepo") or None),
loraFile=(variant.get("loraFile") or None),
loraScale=(variant.get("loraScale") if variant.get("loraScale") is not None else None),
defaultSteps=(variant.get("defaultSteps") if variant.get("defaultSteps") is not None else None),
cfgOverride=(variant.get("cfgOverride") if variant.get("cfgOverride") is not None else None),
# FU-023: variant-pinned Nunchaku SVDQuant snapshot. Threads
# through to ``_ensure_pipeline`` which prefers it over
# NF4 / int8wo on CUDA when nunchaku is installed.
nunchakuRepo=(variant.get("nunchakuRepo") or None),
nunchakuFile=(variant.get("nunchakuFile") or None),
# FU-024: opt-in FP8 layerwise casting. Threaded from the
# request rather than the catalog so users can experiment
# without the catalog committing to fp8 readiness per repo.
fp8LayerwiseCasting=request.fp8LayerwiseCasting,
)
)
created_at = datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
Expand Down Expand Up @@ -429,6 +469,21 @@ def _generate_video_artifact(
request.steps,
)

# FU-019: variant-declared step / CFG defaults override schema
# defaults only when the user kept the schema defaults — explicit
# slider movement on the frontend is preserved untouched. The
# video schema default is steps=50 (see VideoGenerationRequest).
SCHEMA_DEFAULT_STEPS = 50
SCHEMA_DEFAULT_GUIDANCE = 3.0
effective_steps = request.steps
effective_guidance = request.guidance
variant_default_steps = variant.get("defaultSteps")
variant_cfg_override = variant.get("cfgOverride")
if variant_default_steps is not None and request.steps == SCHEMA_DEFAULT_STEPS:
effective_steps = int(variant_default_steps)
if variant_cfg_override is not None and abs(request.guidance - SCHEMA_DEFAULT_GUIDANCE) < 1e-3:
effective_guidance = float(variant_cfg_override)

video, runtime_status = runtime_manager.generate(
VideoGenerationConfig(
modelId=request.modelId,
Expand All @@ -440,8 +495,8 @@ def _generate_video_artifact(
height=request.height,
numFrames=request.numFrames,
fps=request.fps,
steps=request.steps,
guidance=request.guidance,
steps=effective_steps,
guidance=effective_guidance,
seed=request.seed,
ggufRepo=(variant.get("ggufRepo") or None),
ggufFile=(variant.get("ggufFile") or None),
Expand All @@ -451,6 +506,27 @@ def _generate_video_artifact(
enableLtxRefiner=request.enableLtxRefiner,
enhancePrompt=request.enhancePrompt,
cfgDecay=request.cfgDecay,
stgScale=request.stgScale,
previewVae=request.previewVae,
# FU-019: variant-declared LoRA + override metadata.
loraRepo=(variant.get("loraRepo") or None),
loraFile=(variant.get("loraFile") or None),
loraScale=(variant.get("loraScale") if variant.get("loraScale") is not None else None),
defaultSteps=(variant.get("defaultSteps") if variant.get("defaultSteps") is not None else None),
cfgOverride=(variant.get("cfgOverride") if variant.get("cfgOverride") is not None else None),
# Phase 3 / Wan2.2-Distill 4-step: catalog-pinned distilled
# transformers replace both Wan A14B experts at pipeline load.
distillTransformerRepo=(variant.get("distillTransformerRepo") or None),
distillTransformerHighNoiseFile=(variant.get("distillTransformerHighNoiseFile") or None),
distillTransformerLowNoiseFile=(variant.get("distillTransformerLowNoiseFile") or None),
distillTransformerPrecision=(variant.get("distillTransformerPrecision") or None),
# FU-023 / FU-024: catalog-pinned Nunchaku snapshot + opt-in
# FP8 layerwise casting (CUDA-only). Same shape as the image
# side so a future video-Nunchaku release lands without app
# plumbing churn.
nunchakuRepo=(variant.get("nunchakuRepo") or None),
nunchakuFile=(variant.get("nunchakuFile") or None),
fp8LayerwiseCasting=request.fp8LayerwiseCasting,
)
)

Expand Down
Loading