Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Repository Guidelines

## Project Structure & Module Organization
`ghostwriter/` contains the Django application, split into app modules such as `api/`, `reporting/`, `rolodex/`, and `shepherd/`. Templates and static assets live under `ghostwriter/templates/` and `ghostwriter/static/`. Tests sit beside each app in `ghostwriter/*/tests/test_*.py`. Configuration entry points are `manage.py`, `config/settings/`, `local.yml`, and `production.yml`. Frontend and collaboration code lives in `javascript/src/frontend/` and `javascript/src/collab_server/`; generated GraphQL types are in `javascript/src/__generated__/`, and build output lands in `javascript/dist_*`. Long-form docs are in `DOCS/`.

## Build, Test, and Development Commands
Use Docker for the Django stack and `npm` only inside `javascript/`.

- Bootstrap the recommended local development environment with the platform-specific CLI binary: `./ghostwriter-cli-linux install --mode local-dev` on Linux, `./ghostwriter-cli-macos install --mode local-dev` on macOS, or `./ghostwriter-cli.exe install --mode local-dev` on Windows.
- `docker compose -f local.yml up -d` starts or refreshes the local services after config changes.
- `docker compose -f local.yml run --rm django python manage.py makemigrations && docker compose -f local.yml run --rm django python manage.py migrate` creates and applies schema changes.
- `docker compose -f local.yml run django coverage run manage.py test --exclude-tag=GitHub` runs the Python test suite the same way CI does.
- `cd javascript && npm run check` runs the TypeScript compiler with `--noEmit`.
- `cd javascript && npm run format` formats frontend sources with Prettier.
- `cd javascript && npm run codegen` regenerates GraphQL client artifacts after schema or query changes.

## Coding Style & Naming Conventions
Python uses 4-space indentation, `Black`, `isort`, and `flake8`. Follow the project docstring style in `DOCS/coding-style-guide/`, and keep imports grouped and sorted. JavaScript/TypeScript also uses 4-space indentation; Prettier enforces semicolons and double quotes for frontend sources. Use `snake_case` for Python modules and tests, `PascalCase` for React components, and keep test files named `test_<feature>.py`.

## Testing Guidelines
Add or update tests for every behavior change; PR templates require it. Prefer app-local tests in the matching `ghostwriter/<app>/tests/` package. For frontend GraphQL changes, regenerate `javascript/src/__generated__/` and run `npm run check` before opening a PR. Maintain coverage for touched code paths; CI uploads coverage from the Django suite.

## Commit & Pull Request Guidelines
Recent history favors short, imperative commit subjects such as `Corrected typo` or `Updated for GraphQL changes`. Keep commits narrowly scoped and describe the change, not the investigation. Pull requests should link the relevant issue, explain the design, note alternatives or drawbacks, describe verification steps, include a release-notes line, and pass all status checks. Include screenshots when UI behavior changes.
49 changes: 49 additions & 0 deletions ghostwriter/oplog/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,55 @@ def test_ansi_cursor_movement_stripped(self):
self.assertNotIn("\x1b", text)
self.assertIn("text", text)

def test_malformed_csi_with_embedded_escape_preserved(self):
r"""Malformed CSI text with an embedded ESC remains literal."""
events = '[0.5, "o", "prefix\\u001b[12\\u001bXsuffix"]\n'
text, warning = extract_cast_text(self._v3(events))
self.assertIsNone(warning)
self.assertEqual(text, "prefix\x1b[12suffix")

def test_ansi_osc_bel_sequence_stripped(self):
r"""OSC sequences terminated by BEL are removed."""
events = '[0.5, "o", "before\\u001b]0;title\\u0007after"]\n'
text, warning = extract_cast_text(self._v3(events))
self.assertIsNone(warning)
self.assertEqual(text, "beforeafter")

def test_ansi_osc_st_sequence_stripped(self):
r"""OSC sequences terminated by ST (\x1b\\) are removed."""
events = '[0.5, "o", "before\\u001b]0;title\\u001b\\\\after"]\n'
text, warning = extract_cast_text(self._v3(events))
self.assertIsNone(warning)
self.assertEqual(text, "beforeafter")

def test_ansi_fe_sequence_stripped(self):
r"""Single-character Fe escape sequences are removed."""
events = '[0.5, "o", "before\\u001bMafter"]\n'
text, warning = extract_cast_text(self._v3(events))
self.assertIsNone(warning)
self.assertEqual(text, "beforeafter")

def test_unterminated_osc_sequence_preserved(self):
r"""Malformed OSC fragments without BEL or ST remain in the extracted text."""
events = '[0.5, "o", "prefix\\u001b]title only"]\n'
text, warning = extract_cast_text(self._v3(events))
self.assertIsNone(warning)
self.assertEqual(text, "prefix\x1b]title only")

def test_large_unterminated_osc_sequence_preserved(self):
"""Large malformed OSC payloads are preserved without raising an exception."""
payload = "prefix\\u001b]" + ("a" * 10000)
text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n'))
self.assertIsNone(warning)
self.assertEqual(text, f"prefix\x1b]{'a' * 10000}")

def test_repeated_unterminated_osc_prefixes_preserved(self):
"""Repeated unterminated OSC prefixes are preserved without rescanning the tail."""
payload = "\\u001b]a" * 2000
text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n'))
self.assertIsNone(warning)
self.assertEqual(text, "\x1b]a" * 2000)

# ------------------------------------------------------------------
# Gzip support
# ------------------------------------------------------------------
Expand Down
93 changes: 80 additions & 13 deletions ghostwriter/oplog/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,88 @@
import gzip
import json
import logging
import re

logger = logging.getLogger(__name__)

# Matches ANSI/VT100 escape sequences: CSI, SGR, OSC, cursor movement, etc.
_ANSI_ESCAPE_RE = re.compile(
r"\x1b"
r"(?:"
r"[@-Z\\-_]" # Fe escape sequences (e.g. \x1bO, \x1b7)
r"|"
r"\[[0-?]*[ -/]*[@-~]" # CSI sequences (e.g. \x1b[0m, \x1b[32m, \x1b[2J)
r"|"
r"\][^\x07]*(?:\x07|\x1b\\)" # OSC sequences terminated by BEL or ST
r")"
)
def _strip_ansi_escapes(text: str) -> str:
"""
Remove ANSI/VT100 escape sequences using a linear scan.

This strips the same families covered previously by the regex:
single-character Fe escapes, CSI sequences, and OSC sequences
terminated by BEL or ST. Unterminated/unknown escape fragments are
preserved as literal text.
"""
cleaned = []
index = 0
length = len(text)

while index < length:
if text[index] != "\x1b":
cleaned.append(text[index])
index += 1
continue

if index + 1 >= length:
cleaned.append(text[index])
break

next_char = text[index + 1]
next_ord = ord(next_char)

if next_char == "[":
cursor = index + 2
seen_intermediate = False
while cursor < length:
char_ord = ord(text[cursor])

if 0x30 <= char_ord <= 0x3F and not seen_intermediate:
cursor += 1
continue

if 0x20 <= char_ord <= 0x2F:
seen_intermediate = True
cursor += 1
continue

if 0x40 <= char_ord <= 0x7E:
index = cursor + 1
break

# Preserve malformed CSI text and continue from the invalid byte
# so we do not rescan the tail and drift into quadratic behavior.
cleaned.append(text[index:cursor])
index = cursor
break
else:
cleaned.append(text[index:])
break
continue
Comment thread
chrismaddalena marked this conversation as resolved.

if next_char == "]":
cursor = index + 2
while cursor < length:
if text[cursor] == "\x07":
index = cursor + 1
break
if text[cursor] == "\x1b" and cursor + 1 < length and text[cursor + 1] == "\\":
index = cursor + 2
break
cursor += 1
else:
cleaned.append(text[index:])
break
continue

# Fe escape sequences use a single final byte in the 0x40-0x5F range.
if 0x40 <= next_ord <= 0x5F:
index += 2
continue

cleaned.append(text[index])
index += 1

return "".join(cleaned)


def extract_cast_text(file_data: bytes) -> tuple:
Expand Down Expand Up @@ -84,7 +151,7 @@ def extract_cast_text(file_data: bytes) -> tuple:
continue

if event[1] in ("i", "o"):
clean = _ANSI_ESCAPE_RE.sub("", str(event[2]))
clean = _strip_ansi_escapes(str(event[2]))
if clean:
parts.append(clean)

Expand Down
Loading
Loading