diff --git a/internal/helpers/doc.go b/internal/helpers/doc.go index 29fba2e5..78dc2c3a 100644 --- a/internal/helpers/doc.go +++ b/internal/helpers/doc.go @@ -335,6 +335,10 @@ func newDocCreateCommand(runner executor.Runner) *cobra.Command { if err != nil { return err } + if stripped, ok := stripLeadingDuplicateTitleJSONML(jsonml, name); ok { + fmt.Fprintln(cmd.ErrOrStderr(), `note: 正文首个与 --name 相同的一级标题已自动移除(文档标题会单独渲染为页面标题,保留会出现两个标题)。`) + jsonml = stripped + } createResult, err := docInvocationResult(cmd, runner, "doc", "create_document", params) if err != nil { return err @@ -425,6 +429,83 @@ func stripLeadingDuplicateTitleHeading(content, name string) (string, bool) { return strings.TrimLeft(rest, "\r\n"), true } +// stripLeadingDuplicateTitleJSONML is the JSONML-path counterpart of +// stripLeadingDuplicateTitleHeading. The rich-content path (--content-format +// jsonml) writes the body verbatim via update_document, so a leading h1 node +// whose text equals the document name renders the title twice — exactly the +// problem the markdown path already guards against. +// +// jsonmlBody is the marshaled body produced by prepareDocJSONMLBody: a JSON +// array, either a bare node list or wrapped as ["root", {}, ...nodes]. The +// first content node is removed only when it is an h1 whose concatenated text +// (trimmed, case-insensitive) exactly equals name. Any parse failure or +// non-match returns the input untouched, so this never blocks a valid write. +func stripLeadingDuplicateTitleJSONML(jsonmlBody, name string) (string, bool) { + name = strings.TrimSpace(name) + if name == "" { + return jsonmlBody, false + } + var body []any + if err := json.Unmarshal([]byte(jsonmlBody), &body); err != nil { + return jsonmlBody, false + } + // Locate the index of the first content node, skipping a ["root", {}, ...] + // wrapper if present. + first := 0 + if len(body) >= 2 { + if tag, ok := body[0].(string); ok && tag == "root" { + first = 2 + } + } + if first >= len(body) { + return jsonmlBody, false + } + node, ok := body[first].([]any) + if !ok || len(node) == 0 { + return jsonmlBody, false + } + tag, ok := node[0].(string) + if !ok || tag != "h1" { + return jsonmlBody, false + } + if !strings.EqualFold(strings.TrimSpace(jsonmlNodeText(node)), name) { + return jsonmlBody, false + } + body = append(body[:first], body[first+1:]...) + out, err := json.Marshal(body) + if err != nil { + return jsonmlBody, false + } + return string(out), true +} + +// jsonmlNodeText concatenates all string leaves of a JSONML node, skipping the +// leading tag name and any attributes object, so ["h1", {..}, ["span", {..}, +// ["span", {..}, "Title"]]] yields "Title". +func jsonmlNodeText(node any) string { + switch v := node.(type) { + case string: + return v + case []any: + var b strings.Builder + for i, child := range v { + // element[0] is the tag name; a map at element[1] is attributes. + if i == 0 { + if _, isStr := child.(string); isStr { + continue + } + } + if _, isMap := child.(map[string]any); isMap { + continue + } + b.WriteString(jsonmlNodeText(child)) + } + return b.String() + default: + return "" + } +} + func newDocUpdateCommand(runner executor.Runner) *cobra.Command { cmd := &cobra.Command{ Use: "update", diff --git a/internal/helpers/doc_title_dedup_test.go b/internal/helpers/doc_title_dedup_test.go index 6d5bf16b..0219ecb2 100644 --- a/internal/helpers/doc_title_dedup_test.go +++ b/internal/helpers/doc_title_dedup_test.go @@ -159,3 +159,110 @@ func TestDocCreateTitleOnlyContentOmitsMarkdown(t *testing.T) { t.Errorf("markdown param = %v, want omitted", runner.last.Params["markdown"]) } } + +// TestDocCreateStripsDuplicateTitleJSONML verifies the end-to-end JSONML path: +// `doc create --content-format jsonml` must drop a leading h1 whose text equals +// --name before forwarding the body to update_document, otherwise the rich +// document shows the page title twice. +func TestDocCreateStripsDuplicateTitleJSONML(t *testing.T) { + runner := &docCommandRunner{responses: []map[string]any{{"nodeId": "NODE_X"}}} + root := newDocTestRoot(runner) + + body := `{"jsonml":[["h1",{},"命令树参考"],["p",{},"正文"]]}` + _, errOut, err := executeDocCommand(t, root, + "create", "--name", "命令树参考", + "--content-format", "jsonml", "--content", body) + if err != nil { + t.Fatalf("execute: %v\nstderr:\n%s", err, errOut) + } + if len(runner.all) != 2 { + t.Fatalf("calls = %d, want 2 (create + update)", len(runner.all)) + } + if runner.all[1].Tool != "update_document" { + t.Fatalf("second tool = %q, want update_document", runner.all[1].Tool) + } + got, _ := runner.all[1].Params["jsonml"].(string) + if strings.Contains(got, "命令树参考") { + t.Errorf("jsonml = %q, want duplicate h1 stripped", got) + } + if !strings.Contains(got, "正文") { + t.Errorf("jsonml = %q, want body content kept", got) + } + if !strings.Contains(errOut, "已自动移除") { + t.Errorf("stderr = %q, want a note about the removed heading", errOut) + } +} + +// TestStripLeadingDuplicateTitleJSONML covers the JSONML-path counterpart: a +// leading h1 node whose text equals the document name is removed; everything +// else is left untouched. +func TestStripLeadingDuplicateTitleJSONML(t *testing.T) { + tests := []struct { + name string + body string + docName string + want string + stripped bool + }{ + { + name: "root-wrapped duplicate h1 stripped", + body: `["root",{},["h1",{},"晚会简报"],["p",{},"正文"]]`, + docName: "晚会简报", + want: `["root",{},["p",{},"正文"]]`, + stripped: true, + }, + { + name: "nested leaf text matches and stripped", + body: `["root",{},["h1",{"uuid":"x"},["span",{"data-type":"text"},["span",{"data-type":"leaf"},"晚会简报"]]],["p",{},"正文"]]`, + docName: "晚会简报", + want: `["root",{},["p",{},"正文"]]`, + stripped: true, + }, + { + name: "bare body without root wrapper", + body: `[["h1",{},"标题"],["p",{},"正文"]]`, + docName: "标题", + want: `[["p",{},"正文"]]`, + stripped: true, + }, + { + name: "case insensitive match", + body: `["root",{},["h1",{},"Weekly REPORT"],["p",{},"x"]]`, + docName: "weekly report", + want: `["root",{},["p",{},"x"]]`, + stripped: true, + }, + { + name: "distinct heading kept", + body: `["root",{},["h1",{},"背景"],["p",{},"正文"]]`, + docName: "晚会简报", + want: `["root",{},["h1",{},"背景"],["p",{},"正文"]]`, + stripped: false, + }, + { + name: "non-h1 leading node kept", + body: `["root",{},["h2",{},"晚会简报"],["p",{},"正文"]]`, + docName: "晚会简报", + want: `["root",{},["h2",{},"晚会简报"],["p",{},"正文"]]`, + stripped: false, + }, + { + name: "invalid json untouched", + body: `not json`, + docName: "x", + want: `not json`, + stripped: false, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got, ok := stripLeadingDuplicateTitleJSONML(tc.body, tc.docName) + if ok != tc.stripped { + t.Fatalf("stripped = %v, want %v", ok, tc.stripped) + } + if got != tc.want { + t.Fatalf("body = %q, want %q", got, tc.want) + } + }) + } +}