tower · socksy · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/tests/integration/features/cli_runs.feature b/tests/integration/features/cli_runs.feature
@@ -19,7 +19,7 @@ Feature: CLI Run Commands
     Given I have a valid Towerfile in the current directory
     When I run "tower deploy --create" via CLI
     Then I run "tower run -p nonexistent_param=test" via CLI
-    Then the output should show "API Error:"
+    Then the output should show "Error details:"
     And the output should show "Validation error"
     And the output should show "Unknown parameter"
     And the output should not just show "422"
@@ -36,20 +36,18 @@ Feature: CLI Run Commands
     Given I have a simple hello world application named "app-logs-after-completion"
     When I run "tower deploy --create" via CLI
     And I run "tower run" via CLI
-    Then the output should show "First log before run completes"
-    And the output should show "Second log after run completes"
+    Then the output should show "Hello, World!"
 
   Scenario: CLI apps logs follow should stream logs and drain after completion
     Given I have a simple hello world application named "app-logs-after-completion"
     When I run "tower deploy --create" via CLI
     And I run "tower run --detached" via CLI and capture run number
     And I run "tower apps logs --follow {app_name}#{run_number}" via CLI using created app name and run number
-    Then the output should show "First log before run completes"
-    And the output should show "Second log after run completes"
+    Then the output should show "Hello, World!"
 
   Scenario: CLI apps logs follow should display warnings
     Given I have a simple hello world application named "app-logs-warning"
     When I run "tower deploy --create" via CLI
     And I run "tower run --detached" via CLI and capture run number
     And I run "tower apps logs --follow {app_name}#{run_number}" via CLI using created app name and run number
-    Then the output should show "Warning: Rate limit approaching"
+    Then the output should show "Warning: No new logs available"
diff --git a/tests/integration/features/mcp_app_management.feature b/tests/integration/features/mcp_app_management.feature
@@ -100,7 +100,7 @@ Feature: MCP App Management
     When I call tower_deploy via MCP
     Then I call tower_run_remote with invalid parameter "nonexistent_param=test"
     Then I should receive a detailed validation error
-    And the error should mention "Unknown parameter"
+    And the error should mention "Validation error"
     And the error should not just be a status code
 
   Scenario: Local run should detect exit code failures

diff --git a/tests/integration/features/steps/cli_steps.py b/tests/integration/features/steps/cli_steps.py
@@ -30,8 +30,11 @@ def step_run_cli_command(context, command):
         test_env = os.environ.copy()
         test_env["FORCE_COLOR"] = "1"  # Force colored output
         test_env["CLICOLOR_FORCE"] = "1"  # Force colored output
-        test_env["TOWER_URL"] = context.tower_url  # Use mock API
-        test_env["TOWER_JWT"] = "mock_jwt_token"
+        test_env["TOWER_URL"] = context.tower_url  # Use configured API URL
+
+        # Only set mock JWT if not already configured externally
+        if "TOWER_JWT" not in os.environ:
+            test_env["TOWER_JWT"] = "mock_jwt_token"
 
         # Override HOME to use test session
         test_home = Path(__file__).parent.parent.parent / "test-home"
@@ -45,9 +48,11 @@ def step_run_cli_command(context, command):
             env=test_env,
         )
         context.cli_output = result.stdout + result.stderr
+        context.cli_stdout = result.stdout
         context.cli_return_code = result.returncode
     except subprocess.TimeoutExpired:
         context.cli_output = "Command timed out"
+        context.cli_stdout = ""
         context.cli_return_code = 124
     except Exception as e:
         print(f"DEBUG: Exception in CLI command: {type(e).__name__}: {e}")
@@ -267,11 +272,17 @@ def step_table_should_show_columns(context, column_list):
         assert column in output, f"Expected column '{column}' in table, got: {output}"
 
 
+def parse_cli_json(context):
+    """Parse JSON from CLI stdout (excludes stderr)."""
+    raw = getattr(context, "cli_stdout", context.cli_output)
+    return json.loads(raw)
+
+
 @step("the output should be valid JSON")
 def step_output_should_be_valid_json(context):
     """Verify output is valid JSON"""
     try:
-        json.loads(context.cli_output)
+        parse_cli_json(context)
     except json.JSONDecodeError as e:
         raise AssertionError(
             f"Output is not valid JSON: {e}\nOutput: {context.cli_output}"
@@ -281,7 +292,7 @@ def step_output_should_be_valid_json(context):
 @step("the JSON should contain app information")
 def step_json_should_contain_app_info(context):
     """Verify JSON contains app-related information"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     assert (
         "app" in data or "name" in data
     ), f"Expected app information in JSON, got: {data}"
@@ -290,7 +301,7 @@ def step_json_should_contain_app_info(context):
 @step("the JSON should contain runs array")
 def step_json_should_contain_runs_array(context):
     """Verify JSON contains runs array"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     assert "runs" in data and isinstance(
         data["runs"], list
     ), f"Expected runs array in JSON, got: {data}"
@@ -299,7 +310,7 @@ def step_json_should_contain_runs_array(context):
 @step("the JSON should contain the created app information")
 def step_json_should_contain_created_app_info(context):
     """Verify JSON contains created app information"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
 
     expected = IsPartialDict(
         result="success",
@@ -319,7 +330,7 @@ def step_json_should_contain_created_app_info(context):
 @step('the app name should be "{expected_name}"')
 def step_app_name_should_be(context, expected_name):
     """Verify app name matches expected value"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     # Extract app name from response structure
     if "app" in data and "name" in data["app"]:
         actual_name = data["app"]["name"]
@@ -338,7 +349,7 @@ def step_app_name_should_be(context, expected_name):
 @step('the app description should be "{expected_description}"')
 def step_app_description_should_be(context, expected_description):
     """Verify app description matches expected value"""
-    data = json.loads(context.cli_output)
+    data = parse_cli_json(context)
     candidates = []
 
     if "app" in data:

diff --git a/tests/integration/run_tests.py b/tests/integration/run_tests.py
@@ -2,6 +2,12 @@
 """
 Simple test runner for Tower MCP integration tests.
 Assumes dependencies are already installed via nix devShell.
+
+Supports two modes:
+1. Mock mode (default): Starts local mock server at http://127.0.0.1:8000
+2. Real server mode: Set TOWER_URL env var to use external server
+   - Skips mock server startup
+   - Preserves externally configured session.json
 """
 
 import os
@@ -18,12 +24,41 @@ def log(msg):
     print(f"\033[36m[test-runner]\033[0m {msg}")
 
 
+def is_session_externally_configured(test_home):
+    """Check if session.json has been externally configured.
+
+    Returns True if session.json is newer than the git HEAD version,
+    indicating it was set up by external orchestration (e.g., from monorepo).
+    """
+    session_file = test_home / ".config" / "tower" / "session.json"
+    if not session_file.exists():
+        return False
+
+    try:
+        result = subprocess.run(
+            ["git", "log", "-1", "--format=%ct", str(session_file)],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        git_mtime = int(result.stdout.strip())
+        file_mtime = int(session_file.stat().st_mtime)
+        return file_mtime > git_mtime
+    except (subprocess.CalledProcessError, ValueError, FileNotFoundError):
+        return False
+
+
 def reset_session_fixture(test_home):
     """Reset the session.json fixture to its committed state before tests.
 
     The CLI may modify session.json during MCP operations (like team switching),
     so we restore it to the canonical committed version before each test run.
+    Skips reset if session appears to be externally configured.
     """
+    if is_session_externally_configured(test_home):
+        log("Skipping session.json reset (externally configured)")
+        return
+
     session_file = test_home / ".config" / "tower" / "session.json"
     subprocess.run(
         ["git", "checkout", str(session_file)],
@@ -87,32 +122,40 @@ def main():
 
     # Set up environment
     env = os.environ.copy()
-    if "TOWER_URL" not in env:
-        env["TOWER_URL"] = "http://127.0.0.1:8000"
 
     # Set HOME to test-home directory to isolate session from user's real config
     test_home = Path(__file__).parent / "test-home"
     env["HOME"] = str(test_home.absolute())
 
-    log(f"Using API URL: \033[1m{env['TOWER_URL']}\033[0m")
-    log(f"Using test HOME: \033[1m{env['HOME']}\033[0m")
-
-    # Ensure mock server is running
-    mock_process = None
-    if not check_mock_server_health(env["TOWER_URL"]):
-        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        port_in_use = sock.connect_ex(("127.0.0.1", 8000)) == 0
-        sock.close()
-
-        if port_in_use:
-            log(
-                "ERROR: Port 8000 is in use but not responding to health check (some unrelated server?)."
-            )
-            return 1
-
-        mock_process = start_mock_server()
+    # Determine if we're using external configuration or mock server
+    tower_url_preset = "TOWER_URL" in os.environ
+    if tower_url_preset:
+        server_url = env["TOWER_URL"]
+        mock_process = None
+        log(f"Using externally configured API URL: \033[1m{server_url}\033[0m")
     else:
-        log("Mock server already running and healthy")
+        server_url = "http://127.0.0.1:8000"
+        env["TOWER_URL"] = server_url
+        log(f"Using mock server API URL: \033[1m{server_url}\033[0m")
+
+        # Ensure mock server is running
+        mock_process = None
+        if not check_mock_server_health(server_url):
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            port_in_use = sock.connect_ex(("127.0.0.1", 8000)) == 0
+            sock.close()
+
+            if port_in_use:
+                log(
+                    "ERROR: Port 8000 is in use but not responding to health check (some unrelated server?)."
+                )
+                return 1
+
+            mock_process = start_mock_server()
+        else:
+            log("Mock server already running and healthy")
+
+    log(f"Using test HOME: \033[1m{env['HOME']}\033[0m")
 
     # Actually run tests
     try:

diff --git a/tests/mock-api-server/main.py b/tests/mock-api-server/main.py
@@ -11,7 +11,7 @@
 """
 
 from fastapi import FastAPI, HTTPException, Response, Request
-from fastapi.responses import StreamingResponse
+from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 import os
@@ -240,13 +240,20 @@ async def run_app(name: str, run_params: Dict[str, Any]):
 
     parameters = run_params.get("parameters", {})
     if "nonexistent_param" in parameters:
-        raise HTTPException(
+        return JSONResponse(
             status_code=422,
-            detail={
-                "detail": "Validation error",
-                "status": 422,
+            content={
+                "$schema": "http://localhost:8081/v1/schemas/ErrorModel.json",
                 "title": "Unprocessable Entity",
-                "errors": [{"message": "Unknown parameter"}],
+                "status": 422,
+                "detail": "Validation error",
+                "errors": [
+                    {
+                        "message": "Unknown parameter",
+                        "location": "body.parameters",
+                        "value": parameters,
+                    }
+                ],
             },
         )
 
@@ -521,7 +528,7 @@ def make_log_event(seq: int, line_num: int, content: str, timestamp: str):
 
 
 def make_warning_event(content: str, timestamp: str):
-    data = {"data": {"content": content, "reported_at": timestamp}, "event": "warning"}
+    data = {"content": content, "reported_at": timestamp}
     return f"event: warning\ndata: {json.dumps(data)}\n\n"
 
 
@@ -540,25 +547,31 @@ async def describe_run_logs(name: str, seq: int):
 
 
 async def generate_logs_after_completion_test_stream(seq: int):
-    """Log before run completion, then log after.
-
-    Timeline: Run completes at 1 second, second log sent at 1.5 seconds.
-    """
+    """Emit realistic runner logs then close, matching real server behavior."""
+    yield make_log_event(seq, 1, "Using CPython 3.12.9", "2025-08-22T12:00:00Z")
     yield make_log_event(
-        seq, 1, "First log before run completes", "2025-08-22T12:00:00Z"
+        seq, 2, "Creating virtual environment at: .venv", "2025-08-22T12:00:00Z"
     )
-    await asyncio.sleep(1.5)
+    await asyncio.sleep(0.5)
     yield make_log_event(
-        seq, 2, "Second log after run completes", "2025-08-22T12:00:01Z"
+        seq, 3, "Activate with: source .venv/bin/activate", "2025-08-22T12:00:01Z"
     )
+    yield make_log_event(seq, 4, "Hello, World!", "2025-08-22T12:00:01Z")
 
 
 async def generate_warning_log_stream(seq: int):
-    """Stream a warning and a couple of logs, then finish."""
-    yield make_warning_event("Rate limit approaching", "2025-08-22T12:00:00Z")
-    yield make_log_event(seq, 1, "Warning stream log 1", "2025-08-22T12:00:00Z")
-    await asyncio.sleep(1.2)
-    yield make_log_event(seq, 2, "Warning stream log 2", "2025-08-22T12:00:01Z")
+    """Stream logs then emit warning before closing, matching real server behavior."""
+    yield make_log_event(seq, 1, "Using CPython 3.12.9", "2025-08-22T12:00:00Z")
+    yield make_log_event(
+        seq, 2, "Creating virtual environment at: .venv", "2025-08-22T12:00:00Z"
+    )
+    await asyncio.sleep(0.5)
+    yield make_log_event(
+        seq, 3, "Activate with: source .venv/bin/activate", "2025-08-22T12:00:00Z"
+    )
+    yield make_log_event(seq, 4, "Hello, World!", "2025-08-22T12:00:01Z")
+    await asyncio.sleep(0.5)
+    yield make_warning_event("No new logs available", "2025-08-22T12:00:02Z")
 
 
 async def generate_normal_log_stream(seq: int):
@@ -635,6 +648,8 @@ async def update_schedule(schedule_id: str, schedule_data: Dict[str, Any]):
     schedule = mock_schedules_db[schedule_id]
     if "cron" in schedule_data:
         schedule["cron"] = schedule_data["cron"]
+    if "name" in schedule_data:
+        schedule["name"] = schedule_data["name"]
     if "parameters" in schedule_data:
         schedule["parameters"] = schedule_data["parameters"]
     schedule["updated_at"] = now_iso()