diff --git a/src/Netclaw.Actors.Tests/Sessions/LlmSessionStreamingTimeoutTests.cs b/src/Netclaw.Actors.Tests/Sessions/LlmSessionStreamingTimeoutTests.cs
index 28ae71c4..36cc2f63 100644
--- a/src/Netclaw.Actors.Tests/Sessions/LlmSessionStreamingTimeoutTests.cs
+++ b/src/Netclaw.Actors.Tests/Sessions/LlmSessionStreamingTimeoutTests.cs
@@ -31,6 +31,7 @@ protected override void ConfigureSessionServices(IServiceCollection services)
         });
         services.AddSingleton(new SessionConfig
         {
+            PrefillTimeout = TimeSpan.FromSeconds(2),
             FirstTokenTimeout = TimeSpan.FromSeconds(2),
             ToolExecutionTimeout = TimeSpan.FromSeconds(10),
             SidecarLlmTimeout = TimeSpan.FromSeconds(10),
diff --git a/src/Netclaw.Actors.Tests/Sessions/LlmSessionWatchdogTests.cs b/src/Netclaw.Actors.Tests/Sessions/LlmSessionWatchdogTests.cs
index 8c2f80f2..dcdfd836 100644
--- a/src/Netclaw.Actors.Tests/Sessions/LlmSessionWatchdogTests.cs
+++ b/src/Netclaw.Actors.Tests/Sessions/LlmSessionWatchdogTests.cs
@@ -29,6 +29,7 @@ protected override void ConfigureSessionServices(IServiceCollection services)
         });
         services.AddSingleton(new SessionConfig
         {
+            PrefillTimeout = TimeSpan.FromSeconds(1),
             FirstTokenTimeout = TimeSpan.FromSeconds(1),
             ToolExecutionTimeout = TimeSpan.FromSeconds(1),
             SidecarLlmTimeout = TimeSpan.FromSeconds(1),
diff --git a/src/Netclaw.Actors/Sessions/Handlers/ProcessingWatchdog.cs b/src/Netclaw.Actors/Sessions/Handlers/ProcessingWatchdog.cs
index 99d4fa6e..4b24cb1d 100644
--- a/src/Netclaw.Actors/Sessions/Handlers/ProcessingWatchdog.cs
+++ b/src/Netclaw.Actors/Sessions/Handlers/ProcessingWatchdog.cs
@@ -13,6 +13,10 @@ namespace Netclaw.Actors.Sessions.Handlers;
 /// </summary>
 internal sealed class ProcessingWatchdog
 {
+    public const string LlmCall = "llm-call";
+    public const string ToolExecution = "tool-execution";
+    public const string Compaction = "compaction";
+
     private static readonly object TimerKey = new();
     private long _operationId;
     private string? _operationName;
@@ -47,13 +51,23 @@ public void Stop(ITimerScheduler timers)
         _operationName = null;
     }
 
+    /// <summary>
+    /// Switch from the generous prefill budget to the tighter inter-delta timeout.
+    /// Called once when the first real streaming delta arrives.
+    /// </summary>
+    public void Promote(TimeSpan interDeltaTimeout, ITimerScheduler timers)
+        => RestartLlmTimer(interDeltaTimeout, timers);
+
     /// <summary>
     /// Refresh the watchdog timer for an active LLM call (streaming keepalive).
-    /// Only refreshes if the current operation is "llm-call".
+    /// Only refreshes if the current operation is <see cref="LlmCall"/>.
     /// </summary>
     public void Refresh(TimeSpan timeout, ITimerScheduler timers)
+        => RestartLlmTimer(timeout, timers);
+
+    private void RestartLlmTimer(TimeSpan timeout, ITimerScheduler timers)
     {
-        if (_operationName is not "llm-call")
+        if (_operationName is not LlmCall)
             return;
 
         timers.StartSingleTimer(
diff --git a/src/Netclaw.Actors/Sessions/LlmSessionActor.cs b/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
index 9ad63bcc..eef6bdf1 100644
--- a/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
+++ b/src/Netclaw.Actors/Sessions/LlmSessionActor.cs
@@ -492,8 +492,15 @@ private void Processing()
         Command<LlmResponseDeltaReceived>(msg =>
         {
             if (msg.CallId != _activeCallId) return; // stale delta from cancelled call
-            _anyContentStreamed = true;
-            _watchdog.Refresh(_config.FirstTokenTimeout, Timers);
+            if (!_anyContentStreamed)
+            {
+                _anyContentStreamed = true;
+                _watchdog.Promote(_config.FirstTokenTimeout, Timers);
+            }
+            else
+            {
+                _watchdog.Refresh(_config.FirstTokenTimeout, Timers);
+            }
 
             switch (msg.Content)
             {
@@ -908,8 +915,8 @@ await _approvalService.RecordApprovalAsync(
 
             var timeout = msg.OperationName switch
             {
-                "tool-execution" => _config.ToolExecutionTimeout,
-                "llm-call" => _config.FirstTokenTimeout,
+                ProcessingWatchdog.ToolExecution => _config.ToolExecutionTimeout,
+                ProcessingWatchdog.LlmCall => _config.FirstTokenTimeout,
                 _ => _config.TurnLlmTimeout
             };
 
@@ -1097,7 +1104,7 @@ private void Compacting()
         Command<CompactionTriggered>(msg =>
         {
             var timeout = GetCompactionTimeout();
-            _watchdog.Start("compaction", timeout, Timers);
+            _watchdog.Start(ProcessingWatchdog.Compaction, timeout, Timers);
 
             var operationId = _watchdog.CurrentOperationId;
             var stateSnapshot = _state;
@@ -1443,7 +1450,7 @@ private void CommandSessionContextMessages()
     }
 
     private bool IsCurrentCompactionOperation(long operationId)
-        => _watchdog.IsCurrentOperation("compaction", operationId);
+        => _watchdog.IsCurrentOperation(ProcessingWatchdog.Compaction, operationId);
 
 
     /// <summary>
@@ -1600,7 +1607,7 @@ private void HandleToolCallResponse(
         var maxInlineToolResultChars = _config.Tuning.MaxInlineToolResultChars;
         var toolExecutionTimeout = _config.ToolExecutionTimeout;
 
-        _watchdog.Start("tool-execution", toolExecutionTimeout, Timers);
+        _watchdog.Start(ProcessingWatchdog.ToolExecution, toolExecutionTimeout, Timers);
 
         // Capture subscriber snapshot for subagent activity notifications.
         // These are emitted directly from the tool execution thread via Tell(),
@@ -2408,7 +2415,6 @@ private void FireLlmCall(string? recallQuery = null, bool forceNoTools = false)
 
         var self = Self;
         var client = _chatClient;
-        var timeout = _config.FirstTokenTimeout;
 
         var exposedTools = ResolveExposedToolsForCurrentTurn();
         ChatOptions? options = null;
@@ -2420,7 +2426,7 @@ private void FireLlmCall(string? recallQuery = null, bool forceNoTools = false)
             };
         }
 
-        _watchdog.Start("llm-call", timeout, Timers);
+        _watchdog.Start(ProcessingWatchdog.LlmCall, _config.PrefillTimeout, Timers);
 
         TurnLog().Info("turn_llm_call_start messages={MessageCount} toolsEnabled={ToolsEnabled} forceNoTools={ForceNoTools} callId={CallId}",
             messages.Count,
@@ -2979,7 +2985,7 @@ private void EmitUsageOutput(UsageDetails usage)
 
     private void PauseToolExecutionWatchdogForApprovalWait(string callId)
     {
-        if (!string.Equals(_watchdog.CurrentOperationName, "tool-execution", StringComparison.Ordinal))
+        if (!string.Equals(_watchdog.CurrentOperationName, ProcessingWatchdog.ToolExecution, StringComparison.Ordinal))
             return;
 
         _watchdog.Stop(Timers);
@@ -2997,7 +3003,7 @@ private void ResumeToolExecutionWatchdogAfterApprovalWait()
         if (_watchdog.CurrentOperationName is not null)
             return;
 
-        _watchdog.Start("tool-execution", _config.ToolExecutionTimeout, Timers);
+        _watchdog.Start(ProcessingWatchdog.ToolExecution, _config.ToolExecutionTimeout, Timers);
         _log.Info("Resumed tool-execution watchdog after approval response");
     }
 
diff --git a/src/Netclaw.Actors/Sessions/Pipelines/SessionLlmInvoker.cs b/src/Netclaw.Actors/Sessions/Pipelines/SessionLlmInvoker.cs
index c6cc35d4..b3317ff3 100644
--- a/src/Netclaw.Actors/Sessions/Pipelines/SessionLlmInvoker.cs
+++ b/src/Netclaw.Actors/Sessions/Pipelines/SessionLlmInvoker.cs
@@ -91,6 +91,11 @@ public static async Task<LlmResponseReceived> StreamAsync(
                             if (textDeltaCount == 1)
                             {
                                 pendingTextDelta = text.Text;
+                                self.Tell(new LlmResponseDeltaReceived
+                                {
+                                    Content = EmptyTextContent,
+                                    CallId = callId
+                                });
                             }
                             else
                             {
@@ -114,6 +119,11 @@ public static async Task<LlmResponseReceived> StreamAsync(
                             if (thinkingDeltaCount == 1)
                             {
                                 pendingThinkingDelta = thinking.Text;
+                                self.Tell(new LlmResponseDeltaReceived
+                                {
+                                    Content = EmptyTextContent,
+                                    CallId = callId
+                                });
                             }
                             else
                             {
diff --git a/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json b/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json
index d88409bb..afb7cfdc 100644
--- a/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json
+++ b/src/Netclaw.Configuration/Schemas/netclaw-config.v1.schema.json
@@ -145,7 +145,14 @@
           "minimum": 10,
           "maximum": 1800,
           "default": 600,
-          "description": "Maximum inactivity in seconds for LLM streaming calls. Used as both the initial wait for the first token and the silence threshold between deltas (resets on each delta)."
+          "description": "Maximum silence in seconds between consecutive LLM streaming deltas. After the first delta arrives, the watchdog uses this as the inter-delta timeout (timer resets on each delta)."
+        },
+        "PrefillTimeoutSeconds": {
+          "type": "integer",
+          "minimum": 60,
+          "maximum": 7200,
+          "default": 1800,
+          "description": "Maximum time in seconds to wait for the first streaming delta from the LLM. Covers queue wait and prompt prefill on self-hosted backends. After the first delta, the watchdog switches to FirstTokenTimeoutSeconds."
         },
         "CompactionThreshold": {
           "type": "number",
diff --git a/src/Netclaw.Configuration/SessionConfig.cs b/src/Netclaw.Configuration/SessionConfig.cs
index d9e8bd18..a67597fc 100644
--- a/src/Netclaw.Configuration/SessionConfig.cs
+++ b/src/Netclaw.Configuration/SessionConfig.cs
@@ -63,13 +63,22 @@ public sealed record SessionConfig
     public TimeSpan SidecarLlmTimeout { get; init; } = TimeSpan.FromSeconds(90);
 
     /// <summary>
-    /// Maximum inactivity timeout for LLM streaming calls. Used both as the
-    /// initial wait for the first token (prefill phase) and as the silence
-    /// threshold between consecutive deltas — the timer resets on every delta.
+    /// Maximum inactivity timeout between consecutive LLM streaming deltas.
+    /// The timer resets on every delta. Once the first delta arrives the watchdog
+    /// switches from <see cref="PrefillTimeout"/> to this tighter budget.
     /// Falls back to <see cref="TurnLlmTimeout"/> if not explicitly configured.
     /// </summary>
     public TimeSpan FirstTokenTimeout { get; init; } = TimeSpan.FromSeconds(600);
 
+    /// <summary>
+    /// Maximum time to wait for the first streaming delta from the LLM (covers
+    /// queue wait + prompt prefill). Generous default because self-hosted backends
+    /// can be legitimately silent for 10+ minutes during slot contention and cold
+    /// prefill of large contexts. After the first delta, the watchdog switches to
+    /// <see cref="FirstTokenTimeout"/>.
+    /// </summary>
+    public TimeSpan PrefillTimeout { get; init; } = TimeSpan.FromSeconds(1800);
+
     /// <summary>
     /// Internal tuning constants. Bindable from config for development/testing
     /// but not part of the documented operator surface.
@@ -99,6 +108,9 @@ public static SessionConfig BindFromConfiguration(IConfigurationSection section)
             FirstTokenTimeout = raw.FirstTokenTimeoutSeconds > 0
                 ? TimeSpan.FromSeconds(raw.FirstTokenTimeoutSeconds)
                 : raw.TurnLlmTimeoutSeconds != 180 ? turnLlmTimeout : TimeSpan.FromSeconds(600),
+            PrefillTimeout = raw.PrefillTimeoutSeconds > 0
+                ? TimeSpan.FromSeconds(raw.PrefillTimeoutSeconds)
+                : TimeSpan.FromSeconds(1800),
             Tuning = tuning,
         };
     }
@@ -150,5 +162,6 @@ private sealed record RawSessionConfig
         public int ToolExecutionTimeoutSeconds { get; init; } = 90;
         public int SidecarLlmTimeoutSeconds { get; init; } = 90;
         public int FirstTokenTimeoutSeconds { get; init; }
+        public int PrefillTimeoutSeconds { get; init; }
     }
 }
diff --git a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
index 0cb14497..07ddbffc 100644
--- a/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
+++ b/src/Netclaw.Daemon.Tests/Configuration/OpenAiCompatibleChatClientTests.cs
@@ -60,7 +60,8 @@ public async Task StreamsReasoningAndTextDeltas_FromOfficialSpectrum()
         await foreach (var update in client.GetStreamingResponseAsync([new ChatMessage(ChatRole.User, "hello")], cancellationToken: TestContext.Current.CancellationToken))
             updates.Add(update);
 
-        Assert.Equal(3, updates.Count);
+        Assert.Equal(4, updates.Count);
+        Assert.Contains(updates, u => u.Contents.Count == 0 && u.FinishReason is null); // keepalive from content-less initial chunk
         Assert.Contains(updates, u => u.Contents.OfType<TextReasoningContent>().Any(c => c.Text == "Thinking"));
         Assert.Contains(updates, u => u.Contents.OfType<TextContent>().Any(c => c.Text == "Hello"));
         Assert.Contains(updates, u => u.FinishReason == ChatFinishReason.Stop);
diff --git a/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs b/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
index 2339e69d..af6a701f 100644
--- a/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
+++ b/src/Netclaw.Providers/SelfHosted/OpenAiCompatibleChatClient.cs
@@ -74,9 +74,17 @@ public async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
             if (line is null)
                 break;
 
-            if (string.IsNullOrWhiteSpace(line) || !line.StartsWith("data:", StringComparison.Ordinal))
+            if (string.IsNullOrWhiteSpace(line))
                 continue;
 
+            if (!line.StartsWith("data:", StringComparison.Ordinal))
+            {
+                // SSE comment lines (`:` prefix) and event-type lines — yield keepalive
+                // so the watchdog knows the connection is alive during prefill/queuing.
+                yield return KeepaliveUpdate;
+                continue;
+            }
+
             var ssePayload = line[5..].Trim();
             if (ssePayload == "[DONE]")
                 break;
@@ -173,6 +181,9 @@ private JsonObject BuildPayload(IEnumerable<ChatMessage> messages, ChatOptions?
         if (stream)
         {
             body["stream_options"] = new JsonObject { ["include_usage"] = true };
+            // llama-server sends prefill progress as SSE data events when enabled.
+            // Harmless on servers that don't support it (unknown fields are ignored).
+            body["return_progress"] = true;
         }
 
         if (options?.Temperature is { } temperature)
@@ -576,7 +587,12 @@ private static IEnumerable<ChatResponseUpdate> ParseStreamingUpdates(JsonElement
             contents.Add(new UsageContent(usage));
 
         if (contents.Count == 0 && finishReason is null)
+        {
+            // Content-less data events (e.g. prompt_progress during prefill) — yield
+            // keepalive so the watchdog timer resets while the server is working.
+            yield return KeepaliveUpdate;
             yield break;
+        }
 
         yield return new ChatResponseUpdate(ChatRole.Assistant, contents)
         {