diff --git a/openspec/changes/add-odf-comments/design.md b/openspec/changes/add-odf-comments/design.md new file mode 100644 index 0000000..ff605a8 --- /dev/null +++ b/openspec/changes/add-odf-comments/design.md @@ -0,0 +1,58 @@ +# Design notes — ODF `add_comment` + `get_comments` + +## Markup: `office:annotation` (inline in `content.xml`) +ODF comments are inline annotations, not a separate part. A whole-paragraph comment brackets the +paragraph's inline content; a ranged comment brackets a substring: +```xml + + + Jane Doe + 2026-06-06T00:00:00 + The comment body. + Hello world + +``` +The basic markup (annotation parent + `dc:creator`/`dc:date`/`text:p` body, paired +`office:annotation-end` by `office:name`) is conformant under the ODF 1.2/1.3 reference and is what +LibreOffice writes/reads. + +## B1 — annotation body must not leak into the paragraph stream (peer-review BLOCKER) +`OdfDocument.collectBlocks` and `buildSegments` recurse into every child. An `office:annotation` +contains a `` body, so without a guard that body both (a) inflates the anchor paragraph's +visible text and (b) registers as a phantom `pN` block. Reproduced on built `dist`: +`Hello Comment bodyworld` → +`[{p0,"Hello A…Comment bodyworld"},{p1,"Comment body"}]`. Fix: both traversals skip +`office:annotation` / `office:annotation-end` subtrees via a shared `isAnnotationSubtree` guard. +This is covered by an explicit no-leak regression test. + +## B2 — two insertion paths, not one (peer-review BLOCKER) +The Phase-1 `replaceTextById` single-`#text`-node contract is right for `anchor_text` but wrong for +whole-paragraph anchoring (it would fail on paragraphs with `text:span`/`text:s`/`text:tab`/multiple +text nodes). So: +- **Whole-paragraph** (`addWholeBlockAnnotation`): purely structural — annotation as the block's + first inline child, `office:annotation-end` after its last inline child. Independent of text + segmentation. Empty paragraph → a single point annotation (no end). +- **Ranged** (`addRangedAnnotation`): resolve the host `#text` node via `buildSegments`; cross-node + match → `MATCH_SPANS_MULTIPLE_NODES`. Split the host at `end` then at `start`; insert the + annotation before the middle text node and `office:annotation-end` after it. + +## `office:name` / comment ids +Generated names are allocated by scanning ALL existing `office:name` values: pick the smallest `N` +where `__Annot__N` collides with none and `N` exceeds every numeric suffix present. `commentId = N`. +`get_comments` parses `__Annot__` for the numeric id; annotations whose names don't match are +assigned ids sequentially after the max parsed value, deterministically by document order (a +documented limitation — real `.odt`s from LibreOffice use the `__Annot__N` convention). + +## Replies deferred +ODF has no first-class reply graph (DOCX links replies via `commentsExtended.xml`). Inventing a +thread convention now would be fragile, so `parent_comment_id` on a `.odt` returns +`UNSUPPORTED_FOR_ODF`; `get_comments` returns `replies: []` for every ODF comment. Threading is a +later phase. + +## Parity & dispatch +Handlers mirror the DOCX `add_comment`/`get_comments` param + response shapes (`author` stays +required; comment body param is `text`; `get_comments` returns the `McpComment` shape). Inserting an +annotation does NOT shift positional paragraph IDs (annotations are inline children and are skipped +by `collectBlocks`), so no `invalidates_paragraph_ids_after` field is emitted. `isOdfRequest` keys on +the `.odt` extension and is checked after the gdocs branch; the `resolveSessionForTool` chokepoint +still returns `UNSUPPORTED_FOR_ODF` for any tool not in the ODF set before a DocxSession cast. diff --git a/openspec/changes/add-odf-comments/proposal.md b/openspec/changes/add-odf-comments/proposal.md new file mode 100644 index 0000000..ba58e40 --- /dev/null +++ b/openspec/changes/add-odf-comments/proposal.md @@ -0,0 +1,44 @@ +# Change: Extend the ODF (.odt) lane with `add_comment` and `get_comments` + +## Why +Phases 1/2a (`add-odf-core`, `add-odf-grep-insert`) wired a provider-aware ODF lane so an agent +can `open → read_file → grep → replace_text → insert_paragraph → save` a real `.odt`. The next +capability an editing/reviewing agent reaches for is comments. ODF comments are a real +simplification versus DOCX: they live **inline in `content.xml`** as `office:annotation` +elements — there is no separate `comments.xml` part, no rels, and no `commentsExtended.xml`. This +makes comments the lighter of the two remaining Phase-2 tools (the heavier `compare_documents` +tracked-changes atomizer stays deferred to a later phase). + +## What Changes +- `@usejunior/odf-core`: + - Extract the private `buildSegments` / `Segment` from `document.ts` into a shared + `shared/odf/text_segments.ts` so comments reuse the visible-text↔node mapping without an + import cycle. The extracted walk and `collectBlocks` SHALL skip `office:annotation` / + `office:annotation-end` subtrees so an annotation's body `text:p` never leaks into the anchor + paragraph's visible text nor registers as a phantom block. + - New `comments.ts`: read all `office:annotation`s into a structured list; insert an annotation + either over a whole paragraph (structural: first inline child … last inline child) or over a + `anchor_text` substring (single-`#text`-node split). Cross-node ranged matches return + `MATCH_SPANS_MULTIPLE_NODES`. + - `OdfDocument.addComment(...)` / `OdfDocument.getComments()` delegating to `comments.ts`; + export `OdfComment`. +- `@usejunior/docx-mcp`: add `add_comment` and `get_comments` to the ODF supported-tool set; add + `tools/odf/{add_comment,get_comments}.ts` handlers mirroring the DOCX param/response shapes; add + `isOdfRequest` dispatch branches; update `tool_catalog.ts` provider text + regenerated docs. +- Comment **replies** (`parent_comment_id`) on a `.odt` return `UNSUPPORTED_FOR_ODF` — ODF has no + first-class reply graph and a thread convention is deliberately deferred. + +## Impact +- Affected specs: `mcp-server` (ADDED: ODF comment support, OPCM-01..05); `odf-core` (ADDED: ODF + annotations read/write, OANN-01..05). +- Affected code: `packages/odf-core/src/{document,index}.ts`, + `packages/odf-core/src/shared/odf/{namespaces,text_segments}.ts`, + `packages/odf-core/src/comments.ts`; `packages/docx-mcp/src/tools/{provider_guard,session_resolution}.ts`, + `packages/docx-mcp/src/tools/odf/{add_comment,get_comments}.ts`, + `packages/docx-mcp/src/server.ts`, `tool_catalog.ts` + regenerated tool docs. DOCX and Google + Docs paths unchanged. +- Amends the sibling active `add-odf-grep-insert` spec wording (drops `add_comment` from its + "still-unsupported" example, since it becomes supported here). +- `odf-core` stays `private: true` (optional-lazy provider, not a published dependency of docx-mcp). +- Out of scope: `compare_documents`, comment replies/threads, `delete_comment` for ODF, durable + injected `xml:id` anchors, `.ods`/`.odp`. diff --git a/openspec/changes/add-odf-comments/specs/mcp-server/spec.md b/openspec/changes/add-odf-comments/specs/mcp-server/spec.md new file mode 100644 index 0000000..db9ad22 --- /dev/null +++ b/openspec/changes/add-odf-comments/specs/mcp-server/spec.md @@ -0,0 +1,41 @@ +## ADDED Requirements + +### Requirement: ODF Comment Support (`add_comment`, `get_comments`) + +The MCP server SHALL service `add_comment` and `get_comments` for ODF sessions via the +provider-aware `.odt` lane, in addition to the existing ODF tools. Both SHALL route to the ODF +handler when the `file_path` ends in `.odt` (or resolves to an existing `OdfSession`); the DOCX and +Google Docs paths SHALL remain unchanged, and every still-unsupported tool SHALL continue to return +`UNSUPPORTED_FOR_ODF`. + +ODF `add_comment` SHALL insert an `office:annotation` carrying `dc:creator` (the required `author`), +`dc:date`, and a `text:p` comment body. When `anchor_text` is omitted the annotation SHALL bracket +the whole anchor paragraph; when `anchor_text` is provided the annotation SHALL bracket the matched +substring, returning `TEXT_NOT_FOUND` / `MULTIPLE_MATCHES` when the substring is absent or ambiguous +and `MATCH_SPANS_MULTIPLE_NODES` when the match crosses inline node boundaries. Inserting an +annotation SHALL NOT alter the document's positional paragraph IDs. + +ODF `get_comments` SHALL return every annotation in document order in the same shape as the DOCX +tool (`id`, `author`, `date`, `initials`, `text`, `anchored_paragraph_id`, `replies`), with +`replies` always empty for ODF. Comment **replies** are not supported for ODF: an `add_comment` +invocation carrying `parent_comment_id` against a `.odt` SHALL return `UNSUPPORTED_FOR_ODF`. + +#### Scenario: [OPCM-01] `add_comment` annotates a whole ODF paragraph +- **WHEN** `add_comment` is invoked with a `.odt` `file_path`, a `target_paragraph_id`, an `author`, and `text` (no `anchor_text`) +- **THEN** the ODF handler inserts an `office:annotation` bracketing that paragraph and returns `mode: 'root'` with the anchor paragraph id, and the DOCX/gdocs handlers are not invoked + +#### Scenario: [OPCM-02] `add_comment` annotates a substring via `anchor_text` +- **WHEN** `add_comment` is invoked with a `.odt` `file_path`, `target_paragraph_id`, `anchor_text`, `author`, and `text` +- **THEN** the annotation brackets the matched substring (`office:annotation` … `office:annotation-end`) and the response echoes the `anchor_text` + +#### Scenario: [OPCM-03] `get_comments` returns ODF annotations +- **WHEN** `get_comments` is invoked with a `.odt` `file_path` after a comment has been added +- **THEN** the response lists the comment with its author, date, body text, and anchored paragraph id, and `replies` is empty + +#### Scenario: [OPCM-04] Replies are unsupported for ODF +- **WHEN** `add_comment` is invoked with a `.odt` `file_path` and a `parent_comment_id` +- **THEN** an `UNSUPPORTED_FOR_ODF` error is returned and no DOCX logic runs + +#### Scenario: [OPCM-05] Missing or ambiguous `anchor_text` is rejected +- **WHEN** `add_comment` is invoked with an `anchor_text` that is absent from (or matches multiple times in) the target paragraph +- **THEN** a `TEXT_NOT_FOUND` or `MULTIPLE_MATCHES` error is returned and no annotation is inserted diff --git a/openspec/changes/add-odf-comments/specs/odf-core/spec.md b/openspec/changes/add-odf-comments/specs/odf-core/spec.md new file mode 100644 index 0000000..3501834 --- /dev/null +++ b/openspec/changes/add-odf-comments/specs/odf-core/spec.md @@ -0,0 +1,43 @@ +## ADDED Requirements + +### Requirement: ODF Annotations (read/write) + +`OdfDocument` SHALL provide `addComment(params)` and `getComments()` backed by ODF +`office:annotation` markup inline in `content.xml`. + +`addComment` SHALL insert an `office:annotation` carrying `dc:creator`, `dc:date`, and a `text:p` +body, with an `office:name` allocated so it collides with no existing annotation name. When a +visible `start`/`end` range is omitted, the annotation SHALL bracket the whole anchor paragraph by +structural insertion (annotation as the first inline child, `office:annotation-end` after the last +inline child) independent of text segmentation; an empty paragraph SHALL receive a single point +annotation. When a range is given, the annotation SHALL bracket exactly that range by splitting the +host `#text` node, and a range that crosses inline node boundaries SHALL return a +`MATCH_SPANS_MULTIPLE_NODES` result rather than throwing. + +`getComments` SHALL return every annotation in document order with its id (parsed from +`office:name`), author (`dc:creator`), date (`dc:date` or null), body text, and the positional id of +the anchor paragraph. + +An annotation's body SHALL NOT appear in `getParagraphs()` visible text and SHALL NOT register as a +paragraph block: both `collectBlocks` and the visible-text walk SHALL skip `office:annotation` / +`office:annotation-end` subtrees. + +#### Scenario: [OANN-01] addComment brackets a range +- **WHEN** `addComment` is called with a visible `start`/`end` range inside a single text node +- **THEN** an `office:annotation` is inserted at `start` and an `office:annotation-end` with the same `office:name` at `end` + +#### Scenario: [OANN-02] getComments reads annotation metadata +- **WHEN** `getComments` is called on a document containing an annotation +- **THEN** the returned record carries the `dc:creator` author, the `dc:date` date, the body text, and the anchor paragraph's positional id + +#### Scenario: [OANN-03] Whole-paragraph anchoring survives spans and spaces +- **WHEN** `addComment` is called with no range on a paragraph containing `text:span` / `text:s` content +- **THEN** the annotation brackets the entire paragraph via structural insertion without a `MATCH_SPANS_MULTIPLE_NODES` failure + +#### Scenario: [OANN-04] Cross-node ranged match is rejected +- **WHEN** `addComment` is called with a range that crosses inline node boundaries +- **THEN** an `{ ok: false, code: 'MATCH_SPANS_MULTIPLE_NODES' }` result is returned (no throw) + +#### Scenario: [OANN-05] Annotation body does not leak into the paragraph stream +- **WHEN** a paragraph carries an `office:annotation` whose body is a `text:p` +- **THEN** `getParagraphs()` returns only the host paragraph's visible text (without the comment body) and creates no phantom block for the annotation body diff --git a/openspec/changes/add-odf-comments/tasks.md b/openspec/changes/add-odf-comments/tasks.md new file mode 100644 index 0000000..1ad6d34 --- /dev/null +++ b/openspec/changes/add-odf-comments/tasks.md @@ -0,0 +1,22 @@ +# Tasks + +## odf-core +- [x] Add `DC` namespace to `ODF_NS` (`http://purl.org/dc/elements/1.1/`) +- [x] Extract `Segment` + `buildSegments` into `shared/odf/text_segments.ts`; skip `office:annotation` / `office:annotation-end` subtrees (B1) +- [x] `collectBlocks` in `document.ts` skips annotation subtrees (no phantom blocks) (B1) +- [x] New `comments.ts`: `addWholeBlockAnnotation` (structural) + `addRangedAnnotation` (single-text-node split, B2); `readAnnotations`; `office:name` id allocation scanning all existing names +- [x] `OdfDocument.addComment` / `getComments` delegating to `comments.ts`; export `OdfComment` +- [x] odf-core `comments.test.ts`: whole-block (incl. spans), ranged, point/empty, id allocation, round-trip read, MATCH_SPANS_MULTIPLE_NODES, B1 no-leak regression + +## docx-mcp +- [x] `tools/odf/add_comment.ts` (root + ranged; replies → UNSUPPORTED_FOR_ODF; `author` required; `text` param) +- [x] `tools/odf/get_comments.ts` (maps to McpComment shape; `replies: []`) +- [x] Add `add_comment`, `get_comments` to `ODF_SUPPORTED_TOOLS`; update guard hint + both `session_resolution.ts` hints +- [x] Register handlers in `loadOdfHandlers`; add `isOdfRequest` dispatch branches +- [x] Update `tool_catalog.ts` provider text + regenerate `tool-reference.generated.md` +- [x] Switch `odf_grep_insert.test.ts` OPLR-08 + "two unsupported tools" cases to `compare_documents`; amend `add-odf-grep-insert` spec wording (drop `add_comment` from the unsupported example) + +## Tests & verification +- [x] docx-mcp `odf_comments.test.ts`: OPCM-01..05 scenarios + branch tests; `TEST_FEATURE='add-odf-comments'` +- [x] Full CI gate locally + document-shaped `.odt` smoke (add_comment + get_comments on real NVCA .odt, reopen in LibreOffice) +- [x] Coverage ratchet not regressed diff --git a/openspec/changes/add-odf-grep-insert/specs/mcp-server/spec.md b/openspec/changes/add-odf-grep-insert/specs/mcp-server/spec.md index 4619d12..486086e 100644 --- a/openspec/changes/add-odf-grep-insert/specs/mcp-server/spec.md +++ b/openspec/changes/add-odf-grep-insert/specs/mcp-server/spec.md @@ -31,5 +31,5 @@ re-reads before its next edit. - **THEN** a new paragraph is inserted before/after the anchor, the response returns the new positional paragraph ID(s) and ID-invalidation fields, and re-reading reflects the inserted text #### Scenario: [OPLR-08] Still-unsupported tools remain guarded -- **WHEN** a tool outside the ODF supported set (e.g. `compare_documents`, `add_comment`) is invoked against a `.odt` path or ODF session +- **WHEN** a tool outside the ODF supported set (e.g. `compare_documents`) is invoked against a `.odt` path or ODF session - **THEN** an `UNSUPPORTED_FOR_ODF` error is returned and no DOCX logic runs diff --git a/packages/docx-mcp/docs/tool-reference.generated.md b/packages/docx-mcp/docs/tool-reference.generated.md index 05c50cf..606acc3 100644 --- a/packages/docx-mcp/docs/tool-reference.generated.md +++ b/packages/docx-mcp/docs/tool-reference.generated.md @@ -220,7 +220,7 @@ Close an open file session, or close all sessions with explicit confirmation. Su ## `add_comment` -Add a comment or threaded reply to a document. Provide target_paragraph_id + anchor_text for root comments, or parent_comment_id for replies. +Add a comment or threaded reply to a document. Provide target_paragraph_id + anchor_text for root comments, or parent_comment_id for replies. Supports DOCX and ODT (ODT backs comments with office:annotation; threaded replies are DOCX-only). - readOnly: `false` - destructive: `true` @@ -237,7 +237,7 @@ Add a comment or threaded reply to a document. Provide target_paragraph_id + anc ## `get_comments` -Get all comments from the document with IDs, authors, dates, text, and anchored paragraph IDs. Includes threaded replies. Read-only. +Get all comments from the document with IDs, authors, dates, text, and anchored paragraph IDs. Includes threaded replies (DOCX). Supports DOCX and ODT. Read-only. - readOnly: `true` - destructive: `false` diff --git a/packages/docx-mcp/src/server.ts b/packages/docx-mcp/src/server.ts index 9471bb4..728b2b0 100644 --- a/packages/docx-mcp/src/server.ts +++ b/packages/docx-mcp/src/server.ts @@ -89,11 +89,13 @@ async function loadGDocsHandlers(): Promise { async function loadOdfHandlers(): Promise { if (odfHandlers) return odfHandlers; - const [readFile, replaceText, grep, insertParagraph, save, getFileStatus, closeFile] = await Promise.all([ + const [readFile, replaceText, grep, insertParagraph, addComment, getComments, save, getFileStatus, closeFile] = await Promise.all([ import('./tools/odf/read_file.js'), import('./tools/odf/replace_text.js'), import('./tools/odf/grep.js'), import('./tools/odf/insert_paragraph.js'), + import('./tools/odf/add_comment.js'), + import('./tools/odf/get_comments.js'), import('./tools/odf/save.js'), import('./tools/odf/get_file_status.js'), import('./tools/odf/close_file.js'), @@ -103,6 +105,8 @@ async function loadOdfHandlers(): Promise { replace_text: replaceText.odfReplaceText, grep: grep.odfGrep, insert_paragraph: insertParagraph.odfInsertParagraph, + add_comment: addComment.odfAddComment, + get_comments: getComments.odfGetComments, save: save.odfSave, get_file_status: getFileStatus.odfGetFileStatus, close_file: closeFile.odfCloseFile, @@ -190,8 +194,10 @@ export async function dispatchToolCall( if (isOdfRequest(args)) return await dispatchOdf(sessions, args, 'close_file'); return await closeFile(sessions, args as Parameters[1]); case 'add_comment': + if (isOdfRequest(args)) return await dispatchOdf(sessions, args, 'add_comment'); return await addComment(sessions, args as Parameters[1]); case 'get_comments': + if (isOdfRequest(args)) return await dispatchOdf(sessions, args, 'get_comments'); return await getComments(sessions, args as Parameters[1]); case 'delete_comment': return await deleteComment(sessions, args as Parameters[1]); diff --git a/packages/docx-mcp/src/tool_catalog.ts b/packages/docx-mcp/src/tool_catalog.ts index bdf70e6..65bc053 100644 --- a/packages/docx-mcp/src/tool_catalog.ts +++ b/packages/docx-mcp/src/tool_catalog.ts @@ -278,7 +278,7 @@ export const SAFE_DOCX_TOOL_CATALOG = [ { name: 'add_comment', description: - 'Add a comment or threaded reply to a document. Provide target_paragraph_id + anchor_text for root comments, or parent_comment_id for replies.', + 'Add a comment or threaded reply to a document. Provide target_paragraph_id + anchor_text for root comments, or parent_comment_id for replies. Supports DOCX and ODT (ODT backs comments with office:annotation; threaded replies are DOCX-only).', input: z.object({ ...FILE_FIELD, target_paragraph_id: z.string().optional().describe('Paragraph ID to anchor the comment to (for root comments).'), @@ -293,7 +293,7 @@ export const SAFE_DOCX_TOOL_CATALOG = [ { name: 'get_comments', description: - 'Get all comments from the document with IDs, authors, dates, text, and anchored paragraph IDs. Includes threaded replies. Read-only.', + 'Get all comments from the document with IDs, authors, dates, text, and anchored paragraph IDs. Includes threaded replies (DOCX). Supports DOCX and ODT. Read-only.', input: z.object({ ...FILE_FIELD, }), diff --git a/packages/docx-mcp/src/tools/odf/add_comment.ts b/packages/docx-mcp/src/tools/odf/add_comment.ts new file mode 100644 index 0000000..bcee1d9 --- /dev/null +++ b/packages/docx-mcp/src/tools/odf/add_comment.ts @@ -0,0 +1,102 @@ +import { findUniqueSubstringMatch } from '@usejunior/docx-core'; +import { type OdfSession, SessionManager } from '../../session/manager.js'; +import { errorMessage } from '../../error_utils.js'; +import { err, ok, type ToolResponse } from '../types.js'; + +/** + * ODF (.odt) `add_comment`. Inserts an `office:annotation` either over the whole anchor paragraph + * (no `anchor_text`) or bracketing a substring (`anchor_text`). Mirrors the DOCX tool's param and + * response shapes. + * + * Replies (`parent_comment_id`) are NOT supported for ODF — ODF has no first-class reply graph, so a + * reply request returns `UNSUPPORTED_FOR_ODF` (a documented Phase-2b limitation). Annotations are + * inline children, so positional paragraph IDs do not shift; no ID-invalidation fields are emitted. + */ +export async function odfAddComment( + manager: SessionManager, + session: OdfSession, + params: { + file_path?: string; + target_paragraph_id?: string; + anchor_text?: string; + parent_comment_id?: number; + author: string; + text: string; + initials?: string; + }, + metadata: Record, +): Promise { + try { + if (params.parent_comment_id != null) { + return err( + 'UNSUPPORTED_FOR_ODF', + 'Comment replies are not supported for ODF (.odt) files.', + 'ODF has no first-class reply graph; add a new root comment with target_paragraph_id instead.', + ); + } + + if (!params.target_paragraph_id) { + return err( + 'MISSING_PARAMETER', + 'target_paragraph_id is required for ODF comments.', + 'Provide target_paragraph_id (and optional anchor_text) to anchor the comment.', + ); + } + + const pid = params.target_paragraph_id; + const paraText = session.doc.getParagraphTextById(pid); + if (paraText == null) { + return err( + 'ANCHOR_NOT_FOUND', + `Paragraph ID ${pid} not found in document`, + 'Use grep or read_file to find valid paragraph IDs.', + ); + } + + let range: { start?: number; end?: number } = {}; + if (params.anchor_text) { + const match = findUniqueSubstringMatch(paraText, params.anchor_text); + if (match.status === 'not_found') { + return err( + 'TEXT_NOT_FOUND', + `anchor_text '${params.anchor_text}' not found in paragraph ${pid}`, + 'Verify anchor_text is present in the target paragraph.', + ); + } + if (match.status === 'multiple') { + return err( + 'MULTIPLE_MATCHES', + `Found ${match.matchCount} matches for anchor_text in paragraph ${pid}`, + 'Provide more specific anchor_text for a unique match.', + ); + } + range = { start: match.start, end: match.end }; + } + + const result = session.doc.addComment({ + paragraphId: pid, + ...range, + author: params.author, + text: params.text, + initials: params.initials, + }); + if (!result.ok) { + return err(result.code, result.message); + } + manager.markEdited(session); + + return ok({ + success: true, + comment_id: result.commentId, + anchor_paragraph_id: pid, + anchor_text: params.anchor_text ?? null, + mode: 'root', + provider: 'odf', + file_path: session.originalPath, + edit_count: session.editCount, + ...metadata, + }); + } catch (e: unknown) { + return err('COMMENT_ERROR', `Failed to add comment to ODF document: ${errorMessage(e)}`); + } +} diff --git a/packages/docx-mcp/src/tools/odf/get_comments.ts b/packages/docx-mcp/src/tools/odf/get_comments.ts new file mode 100644 index 0000000..88f6f58 --- /dev/null +++ b/packages/docx-mcp/src/tools/odf/get_comments.ts @@ -0,0 +1,54 @@ +import { type OdfSession, SessionManager } from '../../session/manager.js'; +import { errorMessage } from '../../error_utils.js'; +import { err, ok, type ToolResponse } from '../types.js'; + +type McpComment = { + id: number; + author: string; + date: string | null; + initials: string; + text: string; + anchored_paragraph_id: string | null; + replies: McpComment[]; +}; + +/** + * ODF (.odt) `get_comments`. Walks `content.xml` for `office:annotation` elements and returns them + * in document order in the same shape as the DOCX tool. ODF has no reply graph, so `replies` is + * always empty. + */ +export async function odfGetComments( + _manager: SessionManager, + session: OdfSession, + _params: { file_path?: string }, + metadata: Record, +): Promise { + try { + const comments = session.doc.getComments() as Array<{ + id: number; + author: string; + date: string | null; + initials: string; + text: string; + anchoredParagraphId: string | null; + }>; + const mapped: McpComment[] = comments.map((c) => ({ + id: c.id, + author: c.author, + date: c.date || null, + initials: c.initials, + text: c.text, + anchored_paragraph_id: c.anchoredParagraphId, + replies: [], + })); + return ok({ + success: true, + comments: mapped, + provider: 'odf', + file_path: session.originalPath, + ...metadata, + }); + } catch (e: unknown) { + return err('COMMENT_ERROR', `Failed to read comments from ODF document: ${errorMessage(e)}`); + } +} diff --git a/packages/docx-mcp/src/tools/odf/odf_comments.test.ts b/packages/docx-mcp/src/tools/odf/odf_comments.test.ts new file mode 100644 index 0000000..76c1d11 --- /dev/null +++ b/packages/docx-mcp/src/tools/odf/odf_comments.test.ts @@ -0,0 +1,250 @@ +import { afterEach, describe, expect } from 'vitest'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { testAllure as it, type AllureBddContext } from '../../testing/allure-test.js'; +import { dispatchToolCall } from '../../server.js'; +import { SessionManager } from '../../session/manager.js'; + +const TEST_FEATURE = 'add-odf-comments'; +const test = it.epic('Document Editing').withLabels({ feature: TEST_FEATURE }); +const FIXTURE = path.join( + path.dirname(fileURLToPath(import.meta.url)), + '../../../../odf-core/src/__fixtures__/sample.odt', +); + +const tmpDirs: string[] = []; + +afterEach(async () => { + for (const dir of tmpDirs.splice(0)) { + await fs.rm(dir, { recursive: true, force: true }).catch(() => {}); + } +}); + +async function copyFixture(name = 'sample.odt'): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'safe-docx-odf-cm-')); + tmpDirs.push(dir); + const filePath = path.join(dir, name); + await fs.copyFile(FIXTURE, filePath); + return filePath; +} + +type ErrorResult = { success: false; error: { code: string; message: string; hint?: string } }; + +function assertSuccess(result: Record, label: string): asserts result is { success: true; [k: string]: unknown } { + expect(result.success, `${label} failed: ${JSON.stringify((result as ErrorResult).error)}`).toBe(true); +} +function assertError(result: Record, code: string): asserts result is ErrorResult { + expect(result.success).toBe(false); + expect((result as ErrorResult).error.code).toBe(code); +} + +type McpComment = { + id: number; + author: string; + date: string | null; + initials: string; + text: string; + anchored_paragraph_id: string | null; + replies: McpComment[]; +}; + +describe('ODF add_comment + get_comments lane', () => { + test.openspec('[OPCM-01] `add_comment` annotates a whole ODF paragraph')( + 'add_comment routes to the ODF handler and anchors a whole-paragraph comment', + async ({ given, when, then }: AllureBddContext) => { + let manager: SessionManager; + let filePath: string; + let result: Awaited>; + + await given('a file-first ODF session', async () => { + manager = new SessionManager(); + filePath = await copyFixture(); + }); + await when('add_comment is called with a .odt path and no anchor_text', async () => { + result = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, + target_paragraph_id: 'p1', + author: 'Jane Doe', + text: 'Whole-paragraph note', + }); + }); + await then('the ODF handler inserts a root annotation on that paragraph', () => { + assertSuccess(result, 'add_comment'); + expect(result.provider).toBe('odf'); + expect(result.mode).toBe('root'); + expect(result.anchor_paragraph_id).toBe('p1'); + expect(result.anchor_text).toBeNull(); + expect(typeof result.comment_id).toBe('number'); + }); + }, + ); + + test.openspec('[OPCM-02] `add_comment` annotates a substring via `anchor_text`')( + 'add_comment brackets the matched substring and echoes the anchor_text', + async ({ given, when, then }: AllureBddContext) => { + let manager: SessionManager; + let filePath: string; + let result: Awaited>; + + await given('a file-first ODF session', async () => { + manager = new SessionManager(); + filePath = await copyFixture(); + }); + await when('add_comment is called with an anchor_text present once', async () => { + result = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, + target_paragraph_id: 'p0', + anchor_text: 'quick brown', + author: 'Jane Doe', + text: 'on the phrase', + }); + }); + await then('the response echoes the anchor_text and the comment is anchored', () => { + assertSuccess(result, 'add_comment ranged'); + expect(result.provider).toBe('odf'); + expect(result.anchor_text).toBe('quick brown'); + expect(result.anchor_paragraph_id).toBe('p0'); + }); + }, + ); + + test.openspec('[OPCM-03] `get_comments` returns ODF annotations')( + 'get_comments returns the added comment with author, body, anchor, and empty replies', + async ({ given, when, then }: AllureBddContext) => { + let manager: SessionManager; + let filePath: string; + let result: Awaited>; + + await given('an ODF session with one comment added', async () => { + manager = new SessionManager(); + filePath = await copyFixture(); + const add = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, + target_paragraph_id: 'p2', + author: 'Jane Doe', + text: 'Check this figure', + }); + assertSuccess(add, 'add_comment'); + }); + await when('get_comments is called on the .odt path', async () => { + result = await dispatchToolCall(manager, 'get_comments', { file_path: filePath }); + }); + await then('the comment is returned with metadata and no replies', () => { + assertSuccess(result, 'get_comments'); + expect(result.provider).toBe('odf'); + const comments = result.comments as McpComment[]; + expect(comments).toHaveLength(1); + expect(comments[0]!.author).toBe('Jane Doe'); + expect(comments[0]!.text).toBe('Check this figure'); + expect(comments[0]!.anchored_paragraph_id).toBe('p2'); + expect(comments[0]!.replies).toEqual([]); + expect(comments[0]!.date).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$/); + }); + }, + ); + + test.openspec('[OPCM-04] Replies are unsupported for ODF')( + 'add_comment with parent_comment_id on a .odt returns UNSUPPORTED_FOR_ODF', + async ({ given, when, then }: AllureBddContext) => { + let manager: SessionManager; + let filePath: string; + let result: Awaited>; + + await given('a file-first ODF session', async () => { + manager = new SessionManager(); + filePath = await copyFixture(); + }); + await when('add_comment is called with a parent_comment_id', async () => { + result = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, + parent_comment_id: 1, + author: 'Jane Doe', + text: 'a reply', + }); + }); + await then('the ODF handler rejects the reply', () => { + assertError(result, 'UNSUPPORTED_FOR_ODF'); + }); + }, + ); + + test.openspec('[OPCM-05] Missing or ambiguous `anchor_text` is rejected')( + 'add_comment returns TEXT_NOT_FOUND for an absent anchor_text and MULTIPLE_MATCHES for an ambiguous one', + async ({ given, when, then }: AllureBddContext) => { + let manager: SessionManager; + let filePath: string; + let notFound: Awaited>; + let multiple: Awaited>; + + await given('a file-first ODF session', async () => { + manager = new SessionManager(); + filePath = await copyFixture(); + }); + await when('add_comment is called with absent then ambiguous anchor_text', async () => { + notFound = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, target_paragraph_id: 'p0', anchor_text: 'zebra', author: 'A', text: 'x', + }); + multiple = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, target_paragraph_id: 'p0', anchor_text: 'o', author: 'A', text: 'x', + }); + }); + await then('each rejection carries the right error code', () => { + assertError(notFound, 'TEXT_NOT_FOUND'); + assertError(multiple, 'MULTIPLE_MATCHES'); + }); + }, + ); +}); + +// Branch-coverage tests for the ODF comment handlers. +describe('ODF comments branch coverage', () => { + it('add_comment requires target_paragraph_id for root comments', async () => { + const manager = new SessionManager(); + const filePath = await copyFixture(); + const res = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, author: 'A', text: 'orphan', + }); + assertError(res, 'MISSING_PARAMETER'); + }); + + it('add_comment returns ANCHOR_NOT_FOUND for an unknown paragraph id', async () => { + const manager = new SessionManager(); + const filePath = await copyFixture(); + const res = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, target_paragraph_id: 'p99', author: 'A', text: 'x', + }); + assertError(res, 'ANCHOR_NOT_FOUND'); + }); + + it('comments survive a save → reopen round trip', async () => { + const manager = new SessionManager(); + const filePath = await copyFixture(); + const add = await dispatchToolCall(manager, 'add_comment', { + file_path: filePath, target_paragraph_id: 'p0', anchor_text: 'lazy dog', author: 'Jane Doe', text: 'persisted', + }); + assertSuccess(add, 'add_comment'); + const savePath = path.join(path.dirname(filePath), 'with-comment.odt'); + const save = await dispatchToolCall(manager, 'save', { file_path: filePath, save_to_local_path: savePath }); + assertSuccess(save, 'save'); + + // Reopen the saved file in a fresh manager and confirm the comment reads back. + const fresh = new SessionManager(); + const got = await dispatchToolCall(fresh, 'get_comments', { file_path: savePath }); + assertSuccess(got, 'get_comments after reopen'); + const comments = got.comments as McpComment[]; + expect(comments).toHaveLength(1); + expect(comments[0]!.text).toBe('persisted'); + expect(comments[0]!.author).toBe('Jane Doe'); + expect(comments[0]!.anchored_paragraph_id).toBe('p0'); + }); + + it('get_comments returns an empty list when there are no annotations', async () => { + const manager = new SessionManager(); + const filePath = await copyFixture(); + const res = await dispatchToolCall(manager, 'get_comments', { file_path: filePath }); + assertSuccess(res, 'get_comments empty'); + expect(res.comments).toEqual([]); + }); +}); diff --git a/packages/docx-mcp/src/tools/odf/odf_grep_insert.test.ts b/packages/docx-mcp/src/tools/odf/odf_grep_insert.test.ts index c8449ce..a3398e0 100644 --- a/packages/docx-mcp/src/tools/odf/odf_grep_insert.test.ts +++ b/packages/docx-mcp/src/tools/odf/odf_grep_insert.test.ts @@ -126,11 +126,9 @@ describe('ODF grep + insert_paragraph lane', () => { await firstId(manager, filePath); }); await when('a still-unsupported tool targets the .odt path', async () => { - result = await dispatchToolCall(manager, 'add_comment', { + result = await dispatchToolCall(manager, 'compare_documents', { file_path: filePath, - target_paragraph_id: 'p0', - comment_text: 'should be rejected', - author: 'Jane Doe', + save_to_local_path: '/tmp/should-be-rejected.odt', }); }); await then('the provider guard returns UNSUPPORTED_FOR_ODF', () => { @@ -246,13 +244,11 @@ describe('ODF grep + insert branch coverage', () => { it('two unsupported tools on the same .odt path name the correct tool (no stale pending replay)', async () => { const manager = new SessionManager(); const filePath = await copyFixture(); - const one = await dispatchToolCall(manager, 'add_comment', { - file_path: filePath, target_paragraph_id: 'p0', comment_text: 'x', author: 'Jane Doe', - }); + const one = await dispatchToolCall(manager, 'extract_revisions', { file_path: filePath }); const two = await dispatchToolCall(manager, 'delete_comment', { file_path: filePath, comment_id: 'c1' }); assertError(one, 'UNSUPPORTED_FOR_ODF'); assertError(two, 'UNSUPPORTED_FOR_ODF'); - expect(one.error.message).toContain("'add_comment'"); + expect(one.error.message).toContain("'extract_revisions'"); expect(two.error.message).toContain("'delete_comment'"); // A supported tool on the same path must still work (pending map not poisoned). const read = await dispatchToolCall(manager, 'read_file', { file_path: filePath, format: 'simple', limit: 50 }); diff --git a/packages/docx-mcp/src/tools/provider_guard.ts b/packages/docx-mcp/src/tools/provider_guard.ts index 2e9d594..59c3d86 100644 --- a/packages/docx-mcp/src/tools/provider_guard.ts +++ b/packages/docx-mcp/src/tools/provider_guard.ts @@ -7,7 +7,8 @@ const GDOCS_SUPPORTED_TOOLS = new Set([ ]); const ODF_SUPPORTED_TOOLS = new Set([ - 'read_file', 'replace_text', 'grep', 'insert_paragraph', 'save', 'get_file_status', 'close_file', + 'read_file', 'replace_text', 'grep', 'insert_paragraph', 'add_comment', 'get_comments', + 'save', 'get_file_status', 'close_file', ]); export function checkGDocsSupport(toolName: string): ToolResponse | null { @@ -26,7 +27,7 @@ export function checkOdfSupport(toolName: string): ToolResponse | null { return err( 'UNSUPPORTED_FOR_ODF', `'${toolName}' is not supported for ODF (.odt) files.`, - 'Use read_file, replace_text, grep, insert_paragraph, save, get_file_status, or close_file for .odt files.', + 'Use read_file, replace_text, grep, insert_paragraph, add_comment, get_comments, save, get_file_status, or close_file for .odt files.', ); } return null; diff --git a/packages/docx-mcp/src/tools/session_resolution.ts b/packages/docx-mcp/src/tools/session_resolution.ts index 8149492..37fe6a5 100644 --- a/packages/docx-mcp/src/tools/session_resolution.ts +++ b/packages/docx-mcp/src/tools/session_resolution.ts @@ -266,7 +266,7 @@ export async function resolveSessionForTool( response: err( 'UNSUPPORTED_FOR_ODF', `Tool '${opts.toolName}' is not supported for ODF (.odt) files.`, - 'Use read_file, replace_text, grep, insert_paragraph, save, get_file_status, or close_file for .odt files.', + 'Use read_file, replace_text, grep, insert_paragraph, add_comment, get_comments, save, get_file_status, or close_file for .odt files.', ), }; } @@ -302,7 +302,7 @@ export async function resolveSessionForTool( response: err( 'UNSUPPORTED_FOR_ODF', `Tool '${opts.toolName}' is not supported for ODF (.odt) files.`, - 'Use read_file, replace_text, grep, insert_paragraph, save, get_file_status, or close_file for .odt files.', + 'Use read_file, replace_text, grep, insert_paragraph, add_comment, get_comments, save, get_file_status, or close_file for .odt files.', ), }; } diff --git a/packages/odf-core/src/comments.test.ts b/packages/odf-core/src/comments.test.ts new file mode 100644 index 0000000..f5667f2 --- /dev/null +++ b/packages/odf-core/src/comments.test.ts @@ -0,0 +1,160 @@ +import { describe, it, expect } from 'vitest'; + +import { OdfDocument } from './document.js'; + +/** Wrap a content fragment in a minimal, namespace-complete content.xml (incl. dc). */ +function contentXml(bodyInner: string): string { + return ` + + ${bodyInner} +`; +} + +describe('OdfDocument — comments (office:annotation)', () => { + it('[OANN-01] addComment brackets a substring range with annotation + annotation-end', () => { + const doc = OdfDocument.fromContentXml(contentXml('The quick brown fox')); + const res = doc.addComment({ paragraphId: 'p0', start: 4, end: 9, author: 'A', text: 'on quick' }); + expect(res.ok).toBe(true); + if (res.ok) expect(res.commentId).toBe(1); + const xml = doc.toXml(); + // Annotation opens before "quick" and the paired end closes after it. + expect(xml).toMatch(/The .*<\/office:annotation>quick brown fox/); + // Visible text is unchanged. + expect(doc.getParagraphs().map((p) => p.text)).toEqual(['The quick brown fox']); + }); + + it('[OANN-02] getComments reads dc:creator, dc:date, body, and anchor paragraph id', () => { + const doc = OdfDocument.fromContentXml( + contentXml('FirstSecond'), + ); + doc.addComment({ paragraphId: 'p1', author: 'Jane Doe', text: 'Comment body' }); + const comments = doc.getComments(); + expect(comments).toHaveLength(1); + expect(comments[0]!.author).toBe('Jane Doe'); + expect(comments[0]!.text).toBe('Comment body'); + expect(comments[0]!.anchoredParagraphId).toBe('p1'); + expect(comments[0]!.initials).toBe(''); + // dc:date is an ISO-8601-ish string with no fractional seconds. + expect(comments[0]!.date).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}$/); + }); + + it('[OANN-03] whole-paragraph anchoring survives spans and spaces (structural path)', () => { + const doc = OdfDocument.fromContentXml( + contentXml('Hello braveworld'), + ); + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'whole' }); + expect(res.ok).toBe(true); // no MATCH_SPANS_MULTIPLE_NODES on the structural path + const xml = doc.toXml(); + // Annotation is the first inline child; the end is the last child of the paragraph. + expect(xml).toMatch(/<\/text:p>/); + expect(doc.getParagraphs().map((p) => p.text)).toEqual(['Hello brave world']); + }); + + it('[OANN-04] a cross-node ranged match returns MATCH_SPANS_MULTIPLE_NODES (no throw)', () => { + const doc = OdfDocument.fromContentXml( + contentXml('Hello brave world'), + ); + // visible "Hello brave world"; [4,8) = "o br" crosses into the span. + const res = doc.addComment({ paragraphId: 'p0', start: 4, end: 8, author: 'A', text: 'x' }); + expect(res.ok).toBe(false); + if (!res.ok) expect(res.code).toBe('MATCH_SPANS_MULTIPLE_NODES'); + }); + + it('[OANN-05] annotation body does not leak into getParagraphs visible text or block ordinals', () => { + const doc = OdfDocument.fromContentXml( + contentXml('Hello worldSecond'), + ); + doc.addComment({ paragraphId: 'p0', author: 'Reviewer', text: 'a long comment body here' }); + // Re-parse from the serialized form to prove the markup itself is inert to the view. + const reparsed = OdfDocument.fromContentXml(doc.toXml()); + expect(reparsed.getParagraphs()).toEqual([ + { id: 'p0', text: 'Hello world' }, + { id: 'p1', text: 'Second' }, + ]); + }); + + it('inserts a point annotation (no end) on an empty paragraph', () => { + const doc = OdfDocument.fromContentXml(contentXml('')); + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'on empty' }); + expect(res.ok).toBe(true); + const xml = doc.toXml(); + expect(xml).toContain(''); + expect(xml).not.toContain('office:annotation-end'); + }); + + it('allocates office:name past an existing __Annot__ id', () => { + const doc = OdfDocument.fromContentXml( + contentXml( + 'xZ' + + 'old' + + '', + ), + ); + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'new' }); + expect(res.ok).toBe(true); + if (res.ok) expect(res.commentId).toBe(6); + // Both comments read back, the pre-existing one keeping its parsed id. + expect(doc.getComments().map((c) => c.id).sort((a, b) => a - b)).toEqual([5, 6]); + }); + + it('returns ANCHOR_NOT_FOUND for an unknown paragraph id (no throw)', () => { + const doc = OdfDocument.fromContentXml(contentXml('Only')); + const res = doc.addComment({ paragraphId: 'p9', author: 'A', text: 'x' }); + expect(res.ok).toBe(false); + if (!res.ok) expect(res.code).toBe('ANCHOR_NOT_FOUND'); + }); + + it('rejects a reversed/out-of-bounds/one-sided range with INVALID_RANGE and does not mutate text', () => { + const make = () => OdfDocument.fromContentXml(contentXml('abcdef')); + for (const range of [ + { start: 4, end: 2 }, // reversed + { start: 2, end: 2 }, // empty + { start: -1, end: 3 }, // negative + { start: 0, end: 99 }, // out of bounds + { start: 1 }, // one-sided + { end: 3 }, // one-sided + ]) { + const doc = make(); + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'x', ...range }); + expect(res.ok).toBe(false); + if (!res.ok) expect(res.code).toBe('INVALID_RANGE'); + // The paragraph text is untouched (no duplication / corruption). + expect(doc.getParagraphs()[0]!.text).toBe('abcdef'); + } + }); + + it('round-trips a multi-line comment body (line breaks preserved, not collapsed to spaces)', () => { + const doc = OdfDocument.fromContentXml(contentXml('Anchor')); + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'line one\nline two' }); + expect(res.ok).toBe(true); + // Written as text:line-break, not a literal newline in one text node. + expect(doc.toXml()).toContain('line oneline two'); + // Reads back with the newline intact (via a serialize → reparse round trip). + const reparsed = OdfDocument.fromContentXml(doc.toXml()); + expect(reparsed.getComments()[0]!.text).toBe('line one\nline two'); + }); + + it('allocates new ids past the synthetic ids readAnnotations assigns to custom-named annotations', () => { + // A LibreOffice-style annotation with a non-`__Annot__N` name is read as a synthetic id. + const doc = OdfDocument.fromContentXml( + contentXml('BodyBobhi'), + ); + const before = doc.getComments(); + expect(before).toHaveLength(1); + const customId = before[0]!.id; // synthetic id for "Bob" + const res = doc.addComment({ paragraphId: 'p0', author: 'A', text: 'new' }); + expect(res.ok).toBe(true); + const after = doc.getComments(); + // No two comments share an id, and the new comment's id does not steal the custom one's. + const ids = after.map((c) => c.id); + expect(new Set(ids).size).toBe(ids.length); + if (res.ok) expect(ids).toContain(res.commentId); + // The newly added comment's id is distinct from the custom annotation's original id. + if (res.ok) expect(res.commentId).not.toBe(customId); + }); +}); diff --git a/packages/odf-core/src/comments.ts b/packages/odf-core/src/comments.ts new file mode 100644 index 0000000..f986db8 --- /dev/null +++ b/packages/odf-core/src/comments.ts @@ -0,0 +1,263 @@ +/** + * ODF comment (`office:annotation`) read/write over a parsed `content.xml` DOM. + * + * ODF comments are inline: an `office:annotation` (carrying `dc:creator`, `dc:date`, and a + * `text:p` body) marks the anchor point, and a paired `office:annotation-end` (same + * `office:name`) closes a ranged comment. There is no separate comments part. + * + * Two insertion paths (a deliberate split — see the `add-odf-comments` design notes): + * - whole-paragraph: structural (annotation as the block's first inline child, end after its + * last), independent of text segmentation — robust to spans/spaces/tabs/multiple text nodes; + * - ranged: split a single host `#text` node at the visible `[start,end)` offsets; a match that + * crosses node boundaries returns `MATCH_SPANS_MULTIPLE_NODES`. + * + * All element matching is by `namespaceURI` + `localName` (ODF prefixes are not guaranteed). + */ + +import { ODF_NS } from './shared/odf/namespaces.js'; +import { ELEMENT_NODE, buildSegments } from './shared/odf/text_segments.js'; + +/** A comment read back from the document. */ +export type OdfComment = { + id: number; + author: string; + date: string | null; + initials: string; + text: string; + anchoredParagraphId: string | null; +}; + +export type AddAnnotationParams = { + /** Visible offset of the range start; omit (with `end`) to bracket the whole paragraph. */ + start?: number; + /** Visible offset of the range end (exclusive). */ + end?: number; + author: string; + text: string; + initials?: string; +}; + +export type AddAnnotationResult = + | { ok: true; commentId: number; name: string } + | { ok: false; code: 'MATCH_SPANS_MULTIPLE_NODES' | 'INVALID_RANGE'; message: string }; + +const ANNOT_NAME_RE = /^__Annot__(\d+)$/; + +function attrNS(el: Element, ns: string, local: string, prefixed: string): string | null { + return el.getAttributeNS(ns, local) ?? el.getAttribute(prefixed); +} + +/** ODF `dc:date` value: ISO 8601 local-ish, no fractional seconds or trailing `Z`. */ +function nowOdfDate(): string { + return new Date().toISOString().replace(/\.\d{3}Z$/, ''); +} + +/** Collect every existing `office:name` value (on annotations and annotation-ends) in the doc. */ +function existingAnnotationNames(doc: Document): Set { + const names = new Set(); + for (const local of ['annotation', 'annotation-end']) { + const els = doc.getElementsByTagNameNS(ODF_NS.OFFICE, local); + for (let i = 0; i < els.length; i++) { + const name = attrNS(els[i] as Element, ODF_NS.OFFICE, 'name', 'office:name'); + if (name) names.add(name); + } + } + return names; +} + +/** + * Allocate `{ id, name }` for a new annotation, in the SAME id space `readAnnotations` derives. + * `readAnnotations` assigns parsed `__Annot__` suffixes, then hands every annotation whose name + * does NOT match that pattern a synthetic id starting at `maxParsed + 1`. So the next free id must + * clear both: `maxParsed + (count of non-matching annotation elements) + 1`. Still guard against a + * literal `office:name` collision. This keeps the returned `commentId` from coinciding with the + * synthetic id a custom-named (e.g. LibreOffice) annotation would otherwise receive. + */ +function allocateName(doc: Document): { id: number; name: string } { + const names = existingAnnotationNames(doc); + let maxParsed = 0; + let nonMatching = 0; + const annots = doc.getElementsByTagNameNS(ODF_NS.OFFICE, 'annotation'); + for (let i = 0; i < annots.length; i++) { + const name = attrNS(annots[i] as Element, ODF_NS.OFFICE, 'name', 'office:name'); + const m = name ? ANNOT_NAME_RE.exec(name) : null; + if (m) maxParsed = Math.max(maxParsed, Number.parseInt(m[1]!, 10)); + else nonMatching += 1; + } + let id = maxParsed + nonMatching + 1; + while (names.has(`__Annot__${id}`)) id += 1; + return { id, name: `__Annot__${id}` }; +} + +function makeAnnotation(doc: Document, name: string, params: AddAnnotationParams): Element { + const annot = doc.createElementNS(ODF_NS.OFFICE, 'office:annotation'); + annot.setAttributeNS(ODF_NS.OFFICE, 'office:name', name); + const creator = doc.createElementNS(ODF_NS.DC, 'dc:creator'); + creator.appendChild(doc.createTextNode(params.author)); + annot.appendChild(creator); + const date = doc.createElementNS(ODF_NS.DC, 'dc:date'); + date.appendChild(doc.createTextNode(nowOdfDate())); + annot.appendChild(date); + // Body: blank lines split into separate `text:p`; single newlines become `text:line-break` + // (parity with insertParagraph). A literal `\n` in one text node would otherwise render as a + // space in LibreOffice and round-trip lossily through getComments. + const blockTexts = params.text.replace(/\r\n/g, '\n').split(/\n{2,}/); + for (const blockText of blockTexts) { + const body = doc.createElementNS(ODF_NS.TEXT, 'text:p'); + blockText.split('\n').forEach((line, i) => { + if (i > 0) body.appendChild(doc.createElementNS(ODF_NS.TEXT, 'text:line-break')); + if (line.length > 0) body.appendChild(doc.createTextNode(line)); + }); + annot.appendChild(body); + } + return annot; +} + +function makeAnnotationEnd(doc: Document, name: string): Element { + const end = doc.createElementNS(ODF_NS.OFFICE, 'office:annotation-end'); + end.setAttributeNS(ODF_NS.OFFICE, 'office:name', name); + return end; +} + +/** + * Insert an annotation on `block`. With no `start`/`end` the whole paragraph is bracketed; with a + * range, a single host `#text` node is split. Returns the allocated comment id, or + * `MATCH_SPANS_MULTIPLE_NODES` if a ranged match crosses node boundaries. + */ +export function addAnnotation(doc: Document, block: Element, params: AddAnnotationParams): AddAnnotationResult { + const hasStart = params.start != null; + const hasEnd = params.end != null; + // A range is all-or-nothing; a one-sided range is a caller error, not a whole-paragraph comment. + if (hasStart !== hasEnd) { + return { + ok: false, + code: 'INVALID_RANGE', + message: 'A ranged comment requires both start and end; provide neither for a whole-paragraph comment.', + }; + } + const ranged = hasStart && hasEnd; + + if (!ranged) { + // Whole-paragraph: structural bracket, independent of text segmentation. + const { id, name } = allocateName(doc); + const annot = makeAnnotation(doc, name, params); + if (!block.firstChild) { + // Empty paragraph → a single point annotation (no end marker). + block.appendChild(annot); + } else { + block.insertBefore(annot, block.firstChild); + block.appendChild(makeAnnotationEnd(doc, name)); + } + return { ok: true, commentId: id, name }; + } + + const start = params.start!; + const end = params.end!; + const { segments, visible } = buildSegments(block); + // Fail closed on a malformed range before mutating the DOM (reversed/oob ranges duplicate text). + if (!Number.isInteger(start) || !Number.isInteger(end) || start < 0 || start >= end || end > visible.length) { + return { + ok: false, + code: 'INVALID_RANGE', + message: `Invalid annotation range [${start}, ${end}) for a paragraph of visible length ${visible.length}.`, + }; + } + const { id, name } = allocateName(doc); + const annot = makeAnnotation(doc, name, params); + const host = segments.find( + (seg) => seg.kind === 'text' && start >= seg.visStart && end <= seg.visStart + seg.length, + ); + if (!host || host.kind !== 'text') { + return { + ok: false, + code: 'MATCH_SPANS_MULTIPLE_NODES', + message: + `Annotation range [${start}, ${end}) in this paragraph crosses node boundaries ` + + `(spans, spaces, or tabs). Ranged comments must lie within a single text run.`, + }; + } + + // `host.node` is the live DOM text node (typed loosely by buildSegments). + const textNode = host.node as unknown as Node & { data: string }; + const parent = textNode.parentNode!; + const next = textNode.nextSibling; + const localStart = start - host.visStart; + const localEnd = end - host.visStart; + const data = textNode.data; + const mid = data.slice(localStart, localEnd); + const after = data.slice(localEnd); + + // Keep the leading slice in the original node; insert annotation, middle text, end, trailing text. + textNode.data = data.slice(0, localStart); + parent.insertBefore(annot, next); + if (mid.length > 0) parent.insertBefore(doc.createTextNode(mid), next); + parent.insertBefore(makeAnnotationEnd(doc, name), next); + if (after.length > 0) parent.insertBefore(doc.createTextNode(after), next); + + return { ok: true, commentId: id, name }; +} + +/** + * Visible text of an annotation body (its child `text:p` elements, joined by newlines). Uses + * `buildSegments` rather than `textContent` so `text:line-break` → `\n` and `text:s` → spaces are + * preserved (a LibreOffice multi-line comment round-trips instead of collapsing to one line). + */ +function annotationBodyText(annot: Element): string { + const parts: string[] = []; + for (let child = annot.firstChild; child; child = child.nextSibling) { + if (child.nodeType !== ELEMENT_NODE) continue; + const el = child as Element; + if (el.namespaceURI === ODF_NS.TEXT && el.localName === 'p') { + parts.push(buildSegments(el).visible); + } + } + return parts.join('\n'); +} + +function childText(el: Element, ns: string, local: string): string | null { + for (let child = el.firstChild; child; child = child.nextSibling) { + if (child.nodeType !== ELEMENT_NODE) continue; + const c = child as Element; + if (c.namespaceURI === ns && c.localName === local) return c.textContent ?? ''; + } + return null; +} + +/** + * Read every `office:annotation` across `blocks` (document order) into structured comments. + * Numeric ids are parsed from `__Annot__` names; annotations whose names don't match the + * convention are assigned ids sequentially after the max parsed value (a documented limitation). + */ +export function readAnnotations(blocks: Element[]): OdfComment[] { + type Raw = { parsedId: number | null; author: string; date: string | null; text: string; anchor: string }; + const raw: Raw[] = []; + let maxParsed = 0; + + blocks.forEach((block, i) => { + const annots = block.getElementsByTagNameNS(ODF_NS.OFFICE, 'annotation'); + for (let a = 0; a < annots.length; a++) { + const annot = annots[a] as Element; + const name = attrNS(annot, ODF_NS.OFFICE, 'name', 'office:name'); + const m = name ? ANNOT_NAME_RE.exec(name) : null; + const parsedId = m ? Number.parseInt(m[1]!, 10) : null; + if (parsedId != null) maxParsed = Math.max(maxParsed, parsedId); + raw.push({ + parsedId, + author: childText(annot, ODF_NS.DC, 'creator') ?? '', + date: childText(annot, ODF_NS.DC, 'date'), + text: annotationBodyText(annot), + anchor: `p${i}`, + }); + } + }); + + let next = maxParsed + 1; + return raw.map((r) => ({ + id: r.parsedId ?? next++, + author: r.author, + date: r.date, + initials: '', + text: r.text, + anchoredParagraphId: r.anchor, + })); +} diff --git a/packages/odf-core/src/document.ts b/packages/odf-core/src/document.ts index 0488a07..9a1a495 100644 --- a/packages/odf-core/src/document.ts +++ b/packages/odf-core/src/document.ts @@ -14,9 +14,8 @@ import { parseXml, serializeXml } from '@usejunior/docx-core'; import { ODF_NS } from './shared/odf/namespaces.js'; - -const TEXT_NODE = 3; -const ELEMENT_NODE = 1; +import { ELEMENT_NODE, buildSegments, isAnnotationSubtree, isTextBlock } from './shared/odf/text_segments.js'; +import { addAnnotation, readAnnotations, type AddAnnotationResult, type OdfComment } from './comments.js'; export type OdfParagraph = { /** Deterministic structural ID (document-order ordinal), stable for identical bytes. */ @@ -33,14 +32,19 @@ export type InsertResult = | { ok: true; newIds: string[] } | { ok: false; code: 'ANCHOR_NOT_FOUND'; message: string }; -/** A contiguous slice of a paragraph's visible text and where it came from. */ -type Segment = - | { kind: 'text'; node: { data: string }; visStart: number; length: number } - | { kind: 'virtual'; visStart: number; length: number }; +/** Parameters for {@link OdfDocument.addComment}. A `start`/`end` range is optional; omit for whole-paragraph. */ +export type AddCommentParams = { + paragraphId: string; + start?: number; + end?: number; + author: string; + text: string; + initials?: string; +}; -function isTextBlock(el: { namespaceURI?: string | null; localName?: string | null }): boolean { - return el.namespaceURI === ODF_NS.TEXT && (el.localName === 'p' || el.localName === 'h'); -} +export type AddCommentResult = + | { ok: true; commentId: number } + | { ok: false; code: 'ANCHOR_NOT_FOUND' | 'MATCH_SPANS_MULTIPLE_NODES' | 'INVALID_RANGE'; message: string }; export class OdfDocument { private doc: Document; @@ -66,6 +70,8 @@ export class OdfDocument { for (let child = node.firstChild; child; child = child.nextSibling) { if (child.nodeType !== ELEMENT_NODE) continue; const el = child as Element; + // An annotation carries its own `text:p` comment body; never enumerate it as a block. + if (isAnnotationSubtree(el)) continue; if (isTextBlock(el)) { out.push(el); // Block-level text elements are not nested inside one another in ODF, but @@ -90,7 +96,7 @@ export class OdfDocument { getParagraphs(): OdfParagraph[] { return this.blocks.map((el, i) => ({ id: this.idForIndex(i), - text: this.buildSegments(el).visible, + text: buildSegments(el).visible, })); } @@ -98,56 +104,7 @@ export class OdfDocument { getParagraphTextById(id: string): string | null { const el = this.blockForId(id); if (!el) return null; - return this.buildSegments(el).visible; - } - - /** - * Build the ordered segment list and concatenated visible string for a block. - * `text:s` (count via `text:c`) expands to spaces, `text:tab` to a tab, and - * `text:line-break` to a newline — each a "virtual" segment with no single host - * `#text` node (so a match landing on one cannot be edited in place in Phase 1). - */ - private buildSegments(block: Element): { segments: Segment[]; visible: string } { - const segments: Segment[] = []; - let visible = ''; - - const walk = (node: Node): void => { - for (let child = node.firstChild; child; child = child.nextSibling) { - if (child.nodeType === TEXT_NODE) { - const data = (child as unknown as { data: string }).data ?? ''; - if (data.length === 0) continue; - segments.push({ kind: 'text', node: child as unknown as { data: string }, visStart: visible.length, length: data.length }); - visible += data; - continue; - } - if (child.nodeType !== ELEMENT_NODE) continue; - const el = child as Element; - if (el.namespaceURI === ODF_NS.TEXT && el.localName === 's') { - const countRaw = el.getAttributeNS(ODF_NS.TEXT, 'c') ?? el.getAttribute('text:c'); - const count = Math.max(1, Number.parseInt(countRaw ?? '1', 10) || 1); - const spaces = ' '.repeat(count); - segments.push({ kind: 'virtual', visStart: visible.length, length: spaces.length }); - visible += spaces; - continue; - } - if (el.namespaceURI === ODF_NS.TEXT && el.localName === 'tab') { - segments.push({ kind: 'virtual', visStart: visible.length, length: 1 }); - visible += '\t'; - continue; - } - if (el.namespaceURI === ODF_NS.TEXT && el.localName === 'line-break') { - segments.push({ kind: 'virtual', visStart: visible.length, length: 1 }); - visible += '\n'; - continue; - } - // Other elements (text:span, hyperlink, etc.): recurse so their inner - // #text nodes are recorded as separate segments. - walk(el); - } - }; - - walk(block); - return { segments, visible }; + return buildSegments(el).visible; } /** @@ -159,7 +116,7 @@ export class OdfDocument { if (!el) { return { ok: false, code: 'ANCHOR_NOT_FOUND', message: `Paragraph not found: ${id}` }; } - const { segments, visible } = this.buildSegments(el); + const { segments, visible } = buildSegments(el); const matchStart = visible.indexOf(findText); if (matchStart < 0) { return { ok: false, code: 'TEXT_NOT_FOUND', message: `Text not found in paragraph ${id}: ${JSON.stringify(findText)}` }; @@ -243,6 +200,34 @@ export class OdfDocument { return { ok: true, newIds }; } + /** + * Insert an `office:annotation` comment on the paragraph identified by `paragraphId`. + * Omit `start`/`end` to bracket the whole paragraph (structural insertion, independent of + * text segmentation); supply a visible `start`/`end` range to bracket a substring (which must + * lie within a single `#text` node, else `MATCH_SPANS_MULTIPLE_NODES`). Annotations are inline + * children, so positional paragraph IDs do NOT shift. + */ + addComment(params: AddCommentParams): AddCommentResult { + const block = this.blockForId(params.paragraphId); + if (!block) { + return { ok: false, code: 'ANCHOR_NOT_FOUND', message: `Paragraph not found: ${params.paragraphId}` }; + } + const result: AddAnnotationResult = addAnnotation(this.doc, block, { + start: params.start, + end: params.end, + author: params.author, + text: params.text, + initials: params.initials, + }); + if (!result.ok) return result; + return { ok: true, commentId: result.commentId }; + } + + /** All `office:annotation` comments in document order. */ + getComments(): OdfComment[] { + return readAnnotations(this.blocks); + } + /** Serialize the (possibly edited) document back to a `content.xml` string. */ toXml(): string { return serializeXml(this.doc); diff --git a/packages/odf-core/src/index.ts b/packages/odf-core/src/index.ts index 798a154..84630c7 100644 --- a/packages/odf-core/src/index.ts +++ b/packages/odf-core/src/index.ts @@ -1,4 +1,12 @@ export { OdfArchive } from './shared/odf/OdfArchive.js'; -export { OdfDocument, type OdfParagraph, type ReplaceResult, type InsertResult } from './document.js'; +export { + OdfDocument, + type OdfParagraph, + type ReplaceResult, + type InsertResult, + type AddCommentParams, + type AddCommentResult, +} from './document.js'; +export { type OdfComment } from './comments.js'; export { validateOdfArchiveSafety, type OdfArchiveSafetyResult } from './odf_archive_safety.js'; export { ODF_NS, ODF_PATHS, ODT_MIMETYPE } from './shared/odf/namespaces.js'; diff --git a/packages/odf-core/src/shared/odf/namespaces.ts b/packages/odf-core/src/shared/odf/namespaces.ts index 3c7a74c..03bba4c 100644 --- a/packages/odf-core/src/shared/odf/namespaces.ts +++ b/packages/odf-core/src/shared/odf/namespaces.ts @@ -11,6 +11,8 @@ export const ODF_NS = { STYLE: 'urn:oasis:names:tc:opendocument:xmlns:style:1.0', TABLE: 'urn:oasis:names:tc:opendocument:xmlns:table:1.0', MANIFEST: 'urn:oasis:names:tc:opendocument:xmlns:manifest:1.0', + // Dublin Core — carries annotation/change author (`dc:creator`) and date (`dc:date`). + DC: 'http://purl.org/dc/elements/1.1/', } as const; /** The mimetype value an OpenDocument text package declares. */ diff --git a/packages/odf-core/src/shared/odf/text_segments.ts b/packages/odf-core/src/shared/odf/text_segments.ts new file mode 100644 index 0000000..9b74152 --- /dev/null +++ b/packages/odf-core/src/shared/odf/text_segments.ts @@ -0,0 +1,84 @@ +/** + * Shared visible-text ↔ DOM-node mapping for ODF block-level text elements. + * + * Extracted from `document.ts` so both the document view and the comments module use one + * segmentation of a `text:p`/`text:h`'s visible text without an import cycle. + * + * All element matching is by `namespaceURI` + `localName` (ODF prefixes are not guaranteed). + */ + +import { ODF_NS } from './namespaces.js'; + +export const TEXT_NODE = 3; +export const ELEMENT_NODE = 1; + +/** True for the block-level text elements that carry a paragraph's visible content. */ +export function isTextBlock(el: { namespaceURI?: string | null; localName?: string | null }): boolean { + return el.namespaceURI === ODF_NS.TEXT && (el.localName === 'p' || el.localName === 'h'); +} + +/** + * True for an `office:annotation` / `office:annotation-end` element. Annotation subtrees carry + * their own `text:p` comment body, which must NOT be walked as part of the host paragraph's + * visible text nor enumerated as a paragraph block — callers skip these subtrees. + */ +export function isAnnotationSubtree(el: { namespaceURI?: string | null; localName?: string | null }): boolean { + return el.namespaceURI === ODF_NS.OFFICE && (el.localName === 'annotation' || el.localName === 'annotation-end'); +} + +/** A contiguous slice of a paragraph's visible text and where it came from. */ +export type Segment = + | { kind: 'text'; node: { data: string }; visStart: number; length: number } + | { kind: 'virtual'; visStart: number; length: number }; + +/** + * Build the ordered segment list and concatenated visible string for a block. + * `text:s` (count via `text:c`) expands to spaces, `text:tab` to a tab, and + * `text:line-break` to a newline — each a "virtual" segment with no single host + * `#text` node (so a match landing on one cannot be edited in place in Phase 1). + * `office:annotation` / `office:annotation-end` subtrees are skipped entirely. + */ +export function buildSegments(block: Element): { segments: Segment[]; visible: string } { + const segments: Segment[] = []; + let visible = ''; + + const walk = (node: Node): void => { + for (let child = node.firstChild; child; child = child.nextSibling) { + if (child.nodeType === TEXT_NODE) { + const data = (child as unknown as { data: string }).data ?? ''; + if (data.length === 0) continue; + segments.push({ kind: 'text', node: child as unknown as { data: string }, visStart: visible.length, length: data.length }); + visible += data; + continue; + } + if (child.nodeType !== ELEMENT_NODE) continue; + const el = child as Element; + // Skip annotation subtrees: their body text is a comment, not the host paragraph's content. + if (isAnnotationSubtree(el)) continue; + if (el.namespaceURI === ODF_NS.TEXT && el.localName === 's') { + const countRaw = el.getAttributeNS(ODF_NS.TEXT, 'c') ?? el.getAttribute('text:c'); + const count = Math.max(1, Number.parseInt(countRaw ?? '1', 10) || 1); + const spaces = ' '.repeat(count); + segments.push({ kind: 'virtual', visStart: visible.length, length: spaces.length }); + visible += spaces; + continue; + } + if (el.namespaceURI === ODF_NS.TEXT && el.localName === 'tab') { + segments.push({ kind: 'virtual', visStart: visible.length, length: 1 }); + visible += '\t'; + continue; + } + if (el.namespaceURI === ODF_NS.TEXT && el.localName === 'line-break') { + segments.push({ kind: 'virtual', visStart: visible.length, length: 1 }); + visible += '\n'; + continue; + } + // Other elements (text:span, hyperlink, etc.): recurse so their inner + // #text nodes are recorded as separate segments. + walk(el); + } + }; + + walk(block); + return { segments, visible }; +}