GhostManager · chrismaddalena · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,24 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+`ghostwriter/` contains the Django application, split into app modules such as `api/`, `reporting/`, `rolodex/`, and `shepherd/`. Templates and static assets live under `ghostwriter/templates/` and `ghostwriter/static/`. Tests sit beside each app in `ghostwriter/*/tests/test_*.py`. Configuration entry points are `manage.py`, `config/settings/`, `local.yml`, and `production.yml`. Frontend and collaboration code lives in `javascript/src/frontend/` and `javascript/src/collab_server/`; generated GraphQL types are in `javascript/src/__generated__/`, and build output lands in `javascript/dist_*`. Long-form docs are in `DOCS/`.
+
+## Build, Test, and Development Commands
+Use Docker for the Django stack and `npm` only inside `javascript/`.
+
+- Bootstrap the recommended local development environment with the platform-specific CLI binary: `./ghostwriter-cli-linux install --mode local-dev` on Linux, `./ghostwriter-cli-macos install --mode local-dev` on macOS, or `./ghostwriter-cli.exe install --mode local-dev` on Windows.
+- `docker compose -f local.yml up -d` starts or refreshes the local services after config changes.
+- `docker compose -f local.yml run --rm django python manage.py makemigrations && docker compose -f local.yml run --rm django python manage.py migrate` creates and applies schema changes.
+- `docker compose -f local.yml run django coverage run manage.py test --exclude-tag=GitHub` runs the Python test suite the same way CI does.
+- `cd javascript && npm run check` runs the TypeScript compiler with `--noEmit`.
+- `cd javascript && npm run format` formats frontend sources with Prettier.
+- `cd javascript && npm run codegen` regenerates GraphQL client artifacts after schema or query changes.
+
+## Coding Style & Naming Conventions
+Python uses 4-space indentation, `Black`, `isort`, and `flake8`. Follow the project docstring style in `DOCS/coding-style-guide/`, and keep imports grouped and sorted. JavaScript/TypeScript also uses 4-space indentation; Prettier enforces semicolons and double quotes for frontend sources. Use `snake_case` for Python modules and tests, `PascalCase` for React components, and keep test files named `test_<feature>.py`.
+
+## Testing Guidelines
+Add or update tests for every behavior change; PR templates require it. Prefer app-local tests in the matching `ghostwriter/<app>/tests/` package. For frontend GraphQL changes, regenerate `javascript/src/__generated__/` and run `npm run check` before opening a PR. Maintain coverage for touched code paths; CI uploads coverage from the Django suite.
+
+## Commit & Pull Request Guidelines
+Recent history favors short, imperative commit subjects such as `Corrected typo` or `Updated for GraphQL changes`. Keep commits narrowly scoped and describe the change, not the investigation. Pull requests should link the relevant issue, explain the design, note alternatives or drawbacks, describe verification steps, include a release-notes line, and pass all status checks. Include screenshots when UI behavior changes.
diff --git a/ghostwriter/oplog/tests/test_utils.py b/ghostwriter/oplog/tests/test_utils.py
@@ -138,6 +138,55 @@ def test_ansi_cursor_movement_stripped(self):
         self.assertNotIn("\x1b", text)
         self.assertIn("text", text)
 
+    def test_malformed_csi_with_embedded_escape_preserved(self):
+        r"""Malformed CSI text with an embedded ESC remains literal."""
+        events = '[0.5, "o", "prefix\\u001b[12\\u001bXsuffix"]\n'
+        text, warning = extract_cast_text(self._v3(events))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "prefix\x1b[12suffix")
+
+    def test_ansi_osc_bel_sequence_stripped(self):
+        r"""OSC sequences terminated by BEL are removed."""
+        events = '[0.5, "o", "before\\u001b]0;title\\u0007after"]\n'
+        text, warning = extract_cast_text(self._v3(events))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "beforeafter")
+
+    def test_ansi_osc_st_sequence_stripped(self):
+        r"""OSC sequences terminated by ST (\x1b\\) are removed."""
+        events = '[0.5, "o", "before\\u001b]0;title\\u001b\\\\after"]\n'
+        text, warning = extract_cast_text(self._v3(events))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "beforeafter")
+
+    def test_ansi_fe_sequence_stripped(self):
+        r"""Single-character Fe escape sequences are removed."""
+        events = '[0.5, "o", "before\\u001bMafter"]\n'
+        text, warning = extract_cast_text(self._v3(events))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "beforeafter")
+
+    def test_unterminated_osc_sequence_preserved(self):
+        r"""Malformed OSC fragments without BEL or ST remain in the extracted text."""
+        events = '[0.5, "o", "prefix\\u001b]title only"]\n'
+        text, warning = extract_cast_text(self._v3(events))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "prefix\x1b]title only")
+
+    def test_large_unterminated_osc_sequence_preserved(self):
+        """Large malformed OSC payloads are preserved without raising an exception."""
+        payload = "prefix\\u001b]" + ("a" * 10000)
+        text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n'))
+        self.assertIsNone(warning)
+        self.assertEqual(text, f"prefix\x1b]{'a' * 10000}")
+
+    def test_repeated_unterminated_osc_prefixes_preserved(self):
+        """Repeated unterminated OSC prefixes are preserved without rescanning the tail."""
+        payload = "\\u001b]a" * 2000
+        text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n'))
+        self.assertIsNone(warning)
+        self.assertEqual(text, "\x1b]a" * 2000)
+
     # ------------------------------------------------------------------
     # Gzip support
     # ------------------------------------------------------------------

diff --git a/ghostwriter/oplog/utils.py b/ghostwriter/oplog/utils.py
@@ -4,21 +4,88 @@
 import gzip
 import json
 import logging
-import re
 
 logger = logging.getLogger(__name__)
 
-# Matches ANSI/VT100 escape sequences: CSI, SGR, OSC, cursor movement, etc.
-_ANSI_ESCAPE_RE = re.compile(
-    r"\x1b"
-    r"(?:"
-    r"[@-Z\\-_]"  # Fe escape sequences (e.g. \x1bO, \x1b7)
-    r"|"
-    r"\[[0-?]*[ -/]*[@-~]"  # CSI sequences (e.g. \x1b[0m, \x1b[32m, \x1b[2J)
-    r"|"
-    r"\][^\x07]*(?:\x07|\x1b\\)"  # OSC sequences terminated by BEL or ST
-    r")"
-)
+def _strip_ansi_escapes(text: str) -> str:
+    """
+    Remove ANSI/VT100 escape sequences using a linear scan.
+
+    This strips the same families covered previously by the regex:
+    single-character Fe escapes, CSI sequences, and OSC sequences
+    terminated by BEL or ST. Unterminated/unknown escape fragments are
+    preserved as literal text.
+    """
+    cleaned = []
+    index = 0
+    length = len(text)
+
+    while index < length:
+        if text[index] != "\x1b":
+            cleaned.append(text[index])
+            index += 1
+            continue
+
+        if index + 1 >= length:
+            cleaned.append(text[index])
+            break
+
+        next_char = text[index + 1]
+        next_ord = ord(next_char)
+
+        if next_char == "[":
+            cursor = index + 2
+            seen_intermediate = False
+            while cursor < length:
+                char_ord = ord(text[cursor])
+
+                if 0x30 <= char_ord <= 0x3F and not seen_intermediate:
+                    cursor += 1
+                    continue
+
+                if 0x20 <= char_ord <= 0x2F:
+                    seen_intermediate = True
+                    cursor += 1
+                    continue
+
+                if 0x40 <= char_ord <= 0x7E:
+                    index = cursor + 1
+                    break
+
+                # Preserve malformed CSI text and continue from the invalid byte
+                # so we do not rescan the tail and drift into quadratic behavior.
+                cleaned.append(text[index:cursor])
+                index = cursor
+                break
+            else:
+                cleaned.append(text[index:])
+                break
+            continue
+
+        if next_char == "]":
+            cursor = index + 2
+            while cursor < length:
+                if text[cursor] == "\x07":
+                    index = cursor + 1
+                    break
+                if text[cursor] == "\x1b" and cursor + 1 < length and text[cursor + 1] == "\\":
+                    index = cursor + 2
+                    break
+                cursor += 1
+            else:
+                cleaned.append(text[index:])
+                break
+            continue
+
+        # Fe escape sequences use a single final byte in the 0x40-0x5F range.
+        if 0x40 <= next_ord <= 0x5F:
+            index += 2
+            continue
+
+        cleaned.append(text[index])
+        index += 1
+
+    return "".join(cleaned)
 
 
 def extract_cast_text(file_data: bytes) -> tuple:
@@ -84,7 +151,7 @@ def extract_cast_text(file_data: bytes) -> tuple:
                 continue
 
             if event[1] in ("i", "o"):
-                clean = _ANSI_ESCAPE_RE.sub("", str(event[2]))
+                clean = _strip_ansi_escapes(str(event[2]))
                 if clean:
                     parts.append(clean)