diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..e2d49517b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,24 @@ +# Repository Guidelines + +## Project Structure & Module Organization +`ghostwriter/` contains the Django application, split into app modules such as `api/`, `reporting/`, `rolodex/`, and `shepherd/`. Templates and static assets live under `ghostwriter/templates/` and `ghostwriter/static/`. Tests sit beside each app in `ghostwriter/*/tests/test_*.py`. Configuration entry points are `manage.py`, `config/settings/`, `local.yml`, and `production.yml`. Frontend and collaboration code lives in `javascript/src/frontend/` and `javascript/src/collab_server/`; generated GraphQL types are in `javascript/src/__generated__/`, and build output lands in `javascript/dist_*`. Long-form docs are in `DOCS/`. + +## Build, Test, and Development Commands +Use Docker for the Django stack and `npm` only inside `javascript/`. + +- Bootstrap the recommended local development environment with the platform-specific CLI binary: `./ghostwriter-cli-linux install --mode local-dev` on Linux, `./ghostwriter-cli-macos install --mode local-dev` on macOS, or `./ghostwriter-cli.exe install --mode local-dev` on Windows. +- `docker compose -f local.yml up -d` starts or refreshes the local services after config changes. +- `docker compose -f local.yml run --rm django python manage.py makemigrations && docker compose -f local.yml run --rm django python manage.py migrate` creates and applies schema changes. +- `docker compose -f local.yml run django coverage run manage.py test --exclude-tag=GitHub` runs the Python test suite the same way CI does. +- `cd javascript && npm run check` runs the TypeScript compiler with `--noEmit`. +- `cd javascript && npm run format` formats frontend sources with Prettier. +- `cd javascript && npm run codegen` regenerates GraphQL client artifacts after schema or query changes. + +## Coding Style & Naming Conventions +Python uses 4-space indentation, `Black`, `isort`, and `flake8`. Follow the project docstring style in `DOCS/coding-style-guide/`, and keep imports grouped and sorted. JavaScript/TypeScript also uses 4-space indentation; Prettier enforces semicolons and double quotes for frontend sources. Use `snake_case` for Python modules and tests, `PascalCase` for React components, and keep test files named `test_.py`. + +## Testing Guidelines +Add or update tests for every behavior change; PR templates require it. Prefer app-local tests in the matching `ghostwriter//tests/` package. For frontend GraphQL changes, regenerate `javascript/src/__generated__/` and run `npm run check` before opening a PR. Maintain coverage for touched code paths; CI uploads coverage from the Django suite. + +## Commit & Pull Request Guidelines +Recent history favors short, imperative commit subjects such as `Corrected typo` or `Updated for GraphQL changes`. Keep commits narrowly scoped and describe the change, not the investigation. Pull requests should link the relevant issue, explain the design, note alternatives or drawbacks, describe verification steps, include a release-notes line, and pass all status checks. Include screenshots when UI behavior changes. diff --git a/ghostwriter/oplog/tests/test_utils.py b/ghostwriter/oplog/tests/test_utils.py index 9fdeccb77..c415446e3 100644 --- a/ghostwriter/oplog/tests/test_utils.py +++ b/ghostwriter/oplog/tests/test_utils.py @@ -138,6 +138,55 @@ def test_ansi_cursor_movement_stripped(self): self.assertNotIn("\x1b", text) self.assertIn("text", text) + def test_malformed_csi_with_embedded_escape_preserved(self): + r"""Malformed CSI text with an embedded ESC remains literal.""" + events = '[0.5, "o", "prefix\\u001b[12\\u001bXsuffix"]\n' + text, warning = extract_cast_text(self._v3(events)) + self.assertIsNone(warning) + self.assertEqual(text, "prefix\x1b[12suffix") + + def test_ansi_osc_bel_sequence_stripped(self): + r"""OSC sequences terminated by BEL are removed.""" + events = '[0.5, "o", "before\\u001b]0;title\\u0007after"]\n' + text, warning = extract_cast_text(self._v3(events)) + self.assertIsNone(warning) + self.assertEqual(text, "beforeafter") + + def test_ansi_osc_st_sequence_stripped(self): + r"""OSC sequences terminated by ST (\x1b\\) are removed.""" + events = '[0.5, "o", "before\\u001b]0;title\\u001b\\\\after"]\n' + text, warning = extract_cast_text(self._v3(events)) + self.assertIsNone(warning) + self.assertEqual(text, "beforeafter") + + def test_ansi_fe_sequence_stripped(self): + r"""Single-character Fe escape sequences are removed.""" + events = '[0.5, "o", "before\\u001bMafter"]\n' + text, warning = extract_cast_text(self._v3(events)) + self.assertIsNone(warning) + self.assertEqual(text, "beforeafter") + + def test_unterminated_osc_sequence_preserved(self): + r"""Malformed OSC fragments without BEL or ST remain in the extracted text.""" + events = '[0.5, "o", "prefix\\u001b]title only"]\n' + text, warning = extract_cast_text(self._v3(events)) + self.assertIsNone(warning) + self.assertEqual(text, "prefix\x1b]title only") + + def test_large_unterminated_osc_sequence_preserved(self): + """Large malformed OSC payloads are preserved without raising an exception.""" + payload = "prefix\\u001b]" + ("a" * 10000) + text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n')) + self.assertIsNone(warning) + self.assertEqual(text, f"prefix\x1b]{'a' * 10000}") + + def test_repeated_unterminated_osc_prefixes_preserved(self): + """Repeated unterminated OSC prefixes are preserved without rescanning the tail.""" + payload = "\\u001b]a" * 2000 + text, warning = extract_cast_text(self._v3(f'[0.5, "o", "{payload}"]\n')) + self.assertIsNone(warning) + self.assertEqual(text, "\x1b]a" * 2000) + # ------------------------------------------------------------------ # Gzip support # ------------------------------------------------------------------ diff --git a/ghostwriter/oplog/utils.py b/ghostwriter/oplog/utils.py index eed7ef740..e47067146 100644 --- a/ghostwriter/oplog/utils.py +++ b/ghostwriter/oplog/utils.py @@ -4,21 +4,88 @@ import gzip import json import logging -import re logger = logging.getLogger(__name__) -# Matches ANSI/VT100 escape sequences: CSI, SGR, OSC, cursor movement, etc. -_ANSI_ESCAPE_RE = re.compile( - r"\x1b" - r"(?:" - r"[@-Z\\-_]" # Fe escape sequences (e.g. \x1bO, \x1b7) - r"|" - r"\[[0-?]*[ -/]*[@-~]" # CSI sequences (e.g. \x1b[0m, \x1b[32m, \x1b[2J) - r"|" - r"\][^\x07]*(?:\x07|\x1b\\)" # OSC sequences terminated by BEL or ST - r")" -) +def _strip_ansi_escapes(text: str) -> str: + """ + Remove ANSI/VT100 escape sequences using a linear scan. + + This strips the same families covered previously by the regex: + single-character Fe escapes, CSI sequences, and OSC sequences + terminated by BEL or ST. Unterminated/unknown escape fragments are + preserved as literal text. + """ + cleaned = [] + index = 0 + length = len(text) + + while index < length: + if text[index] != "\x1b": + cleaned.append(text[index]) + index += 1 + continue + + if index + 1 >= length: + cleaned.append(text[index]) + break + + next_char = text[index + 1] + next_ord = ord(next_char) + + if next_char == "[": + cursor = index + 2 + seen_intermediate = False + while cursor < length: + char_ord = ord(text[cursor]) + + if 0x30 <= char_ord <= 0x3F and not seen_intermediate: + cursor += 1 + continue + + if 0x20 <= char_ord <= 0x2F: + seen_intermediate = True + cursor += 1 + continue + + if 0x40 <= char_ord <= 0x7E: + index = cursor + 1 + break + + # Preserve malformed CSI text and continue from the invalid byte + # so we do not rescan the tail and drift into quadratic behavior. + cleaned.append(text[index:cursor]) + index = cursor + break + else: + cleaned.append(text[index:]) + break + continue + + if next_char == "]": + cursor = index + 2 + while cursor < length: + if text[cursor] == "\x07": + index = cursor + 1 + break + if text[cursor] == "\x1b" and cursor + 1 < length and text[cursor + 1] == "\\": + index = cursor + 2 + break + cursor += 1 + else: + cleaned.append(text[index:]) + break + continue + + # Fe escape sequences use a single final byte in the 0x40-0x5F range. + if 0x40 <= next_ord <= 0x5F: + index += 2 + continue + + cleaned.append(text[index]) + index += 1 + + return "".join(cleaned) def extract_cast_text(file_data: bytes) -> tuple: @@ -84,7 +151,7 @@ def extract_cast_text(file_data: bytes) -> tuple: continue if event[1] in ("i", "o"): - clean = _ANSI_ESCAPE_RE.sub("", str(event[2])) + clean = _strip_ansi_escapes(str(event[2])) if clean: parts.append(clean) diff --git a/javascript/package-lock.json b/javascript/package-lock.json index bc76d8cba..995000ac1 100644 --- a/javascript/package-lock.json +++ b/javascript/package-lock.json @@ -2333,6 +2333,348 @@ "node": ">= 8" } }, + "node_modules/@parcel/watcher": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.6.tgz", + "integrity": "sha512-tmmZ3lQxAe/k/+rNnXQRawJ4NjxO2hqiOLTHvWchtGZULp4RyFeh6aU4XdOYBFe2KE1oShQTv4AblOs2iOrNnQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "detect-libc": "^2.0.3", + "is-glob": "^4.0.3", + "node-addon-api": "^7.0.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "@parcel/watcher-android-arm64": "2.5.6", + "@parcel/watcher-darwin-arm64": "2.5.6", + "@parcel/watcher-darwin-x64": "2.5.6", + "@parcel/watcher-freebsd-x64": "2.5.6", + "@parcel/watcher-linux-arm-glibc": "2.5.6", + "@parcel/watcher-linux-arm-musl": "2.5.6", + "@parcel/watcher-linux-arm64-glibc": "2.5.6", + "@parcel/watcher-linux-arm64-musl": "2.5.6", + "@parcel/watcher-linux-x64-glibc": "2.5.6", + "@parcel/watcher-linux-x64-musl": "2.5.6", + "@parcel/watcher-win32-arm64": "2.5.6", + "@parcel/watcher-win32-ia32": "2.5.6", + "@parcel/watcher-win32-x64": "2.5.6" + } + }, + "node_modules/@parcel/watcher-android-arm64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.5.6.tgz", + "integrity": "sha512-YQxSS34tPF/6ZG7r/Ih9xy+kP/WwediEUsqmtf0cuCV5TPPKw/PQHRhueUo6JdeFJaqV3pyjm0GdYjZotbRt/A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-arm64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.5.6.tgz", + "integrity": "sha512-Z2ZdrnwyXvvvdtRHLmM4knydIdU9adO3D4n/0cVipF3rRiwP+3/sfzpAwA/qKFL6i1ModaabkU7IbpeMBgiVEA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-x64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.5.6.tgz", + "integrity": "sha512-HgvOf3W9dhithcwOWX9uDZyn1lW9R+7tPZ4sug+NGrGIo4Rk1hAXLEbcH1TQSqxts0NYXXlOWqVpvS1SFS4fRg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-freebsd-x64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.5.6.tgz", + "integrity": "sha512-vJVi8yd/qzJxEKHkeemh7w3YAn6RJCtYlE4HPMoVnCpIXEzSrxErBW5SJBgKLbXU3WdIpkjBTeUNtyBVn8TRng==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-glibc": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.5.6.tgz", + "integrity": "sha512-9JiYfB6h6BgV50CCfasfLf/uvOcJskMSwcdH1PHH9rvS1IrNy8zad6IUVPVUfmXr+u+Km9IxcfMLzgdOudz9EQ==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-musl": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-musl/-/watcher-linux-arm-musl-2.5.6.tgz", + "integrity": "sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-glibc": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.5.6.tgz", + "integrity": "sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-musl": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.5.6.tgz", + "integrity": "sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-glibc": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.5.6.tgz", + "integrity": "sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-musl": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.5.6.tgz", + "integrity": "sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-arm64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.5.6.tgz", + "integrity": "sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-ia32": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.5.6.tgz", + "integrity": "sha512-k35yLp1ZMwwee3Ez/pxBi5cf4AoBKYXj00CZ80jUz5h8prpiaQsiRPKQMxoLstNuqe2vR4RNPEAEcjEFzhEz/g==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-x64": { + "version": "2.5.6", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.5.6.tgz", + "integrity": "sha512-hbQlYcCq5dlAX9Qx+kFb0FHue6vbjlf0FrNzSKdYK2APUf7tGfGxQCk2ihEREmbR6ZMc0MVAD5RIX/41gpUzTw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher/node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, "node_modules/@pinojs/redact": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz", @@ -4537,6 +4879,17 @@ "node": ">=8" } }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "dev": true, + "license": "Apache-2.0", + "optional": true, + "engines": { + "node": ">=8" + } + }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -5845,6 +6198,14 @@ "tslib": "^2.0.3" } }, + "node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/node-domexception": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",