From e2423e2794f9dbaf54594cb2c166af015473537a Mon Sep 17 00:00:00 2001 From: Mateusz Baginski Date: Mon, 6 Oct 2025 09:16:51 +0200 Subject: [PATCH 1/2] Add MS footnotes support to paste from office. --- .../src/filters/replacemsfootnotes.ts | 254 +++++++++ .../src/normalizers/mswordnormalizer.ts | 2 + .../tests/filters/replacemsfootnotes.js | 489 ++++++++++++++++++ 3 files changed, 745 insertions(+) create mode 100644 packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts create mode 100644 packages/ckeditor5-paste-from-office/tests/filters/replacemsfootnotes.js diff --git a/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts b/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts new file mode 100644 index 00000000000..304b6071393 --- /dev/null +++ b/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts @@ -0,0 +1,254 @@ +/** + * @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved. + * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options + */ + +/** + * @module paste-from-office/filters/replacemsfootnotes + */ + +import type { ViewDocumentFragment, ViewElement, ViewText, ViewUpcastWriter } from 'ckeditor5/src/engine.js'; + +/** + * Replaces MS Word specific footnotes references and definitions with proper elements. + * + * Things to know about MS Word footnotes: + * + * * Footnote references in Word are marked with `mso-footnote-id` style. + * * Word does not support nested footnotes, so references within definitions are ignored. + * * Word appends extra spaces after footnote references within definitions, which are trimmed. + * * Footnote definitions list is marked with `mso-element: footnote-list` style it contain `mso-element: footnote` elements. + * * Footnote definition might contain tables, lists and other elements, not only text. They are placed directly within `li` element, + * without any wrapper (in opposition to text content of the definition, which is placed within `MsoFootnoteText` element). + * + * Example pseudo document showing MS Word footnote structure: + * + * ```html + *

Text with footnote[1] reference.

+ * + *
+ *
+ *

[1] Footnote content

+ * ...
+ *
+ *
+ * ``` + * + * Will be transformed into: + * + * ```html + *

Text with footnote1 reference.

+ * + *
    + *
  1. + * ^ + *
    + *

    Footnote content

    + * ...
    + *
    + *
  2. + *
+ * ``` + * + * @param documentFragment element `data.content` obtained from clipboard + * @param writer The view writer instance. + * @internal + */ +export function replaceMSFootnotes( documentFragment: ViewDocumentFragment, writer: ViewUpcastWriter ): void { + const msFootnotesRefs = new Map(); + const msFootnotesDefs = new Map(); + let msFootnotesDefinitionsList: ViewElement | null = null; + + // Phase 1: Collect all footnotes references and definitions. Find the footnotes definitions list element. + for ( const { item } of writer.createRangeIn( documentFragment ) ) { + if ( !item.is( 'element' ) ) { + continue; + } + + // If spot a footnotes definitions element, let's store it. It'll be replaced later. + // There should be only one such element in the document. + if ( item.getStyle( 'mso-element' ) === 'footnote-list' ) { + msFootnotesDefinitionsList = item; + continue; + } + + // If spot a footnote reference or definition, store it in the corresponding map. + if ( item.hasStyle( 'mso-footnote-id' ) ) { + const msFootnoteDef = item.findAncestor( 'element', el => el.getStyle( 'mso-element' ) === 'footnote' ); + + if ( msFootnoteDef ) { + // If it's a reference within a definition, ignore it and track only the definition. + // MS Word do not support nested footnotes, so it's safe to assume that all references within + // a definition point to the same definition. + const msFootnoteDefId = msFootnoteDef.getAttribute( 'id' )!; + + msFootnotesDefs.set( msFootnoteDefId, msFootnoteDef ); + } else { + // If it's a reference outside of a definition, track it as a reference. + const msFootnoteRefId = item.getStyle( 'mso-footnote-id' )!; + + msFootnotesRefs.set( msFootnoteRefId, item ); + } + + continue; + } + } + + // If there are no footnotes references or definitions, or no definitions list, there's nothing to normalize. + if ( !msFootnotesRefs.size || !msFootnotesDefinitionsList ) { + return; + } + + // Phase 2: Replace footnotes definitions list with proper element. + const footnotesDefinitionsList = createFootnotesListViewElement( writer ); + + writer.replace( msFootnotesDefinitionsList, footnotesDefinitionsList ); + + // Phase 3: Replace all footnotes references and add matching definitions to the definitions list. + for ( const [ footnoteId, msFootnoteRef ] of msFootnotesRefs ) { + const msFootnoteDef = msFootnotesDefs.get( footnoteId ); + + if ( !msFootnoteDef ) { + continue; + } + + // Replace footnote reference. + writer.replace( msFootnoteRef, createFootnoteRefViewElement( writer, footnoteId ) ); + + // Append found matching definition to the definitions list. + // Order doesn't matter here, as it'll be fixed in the post-fixer. + const defElements = createFootnoteDefViewElement( writer, footnoteId ); + + removeMSReferences( writer, msFootnoteDef ); + + // Insert content within the `MsoFootnoteText` element. It's usually a definition text content. + for ( const child of msFootnoteDef.getChildren() ) { + let clonedChild = child; + + if ( child.is( 'element' ) ) { + clonedChild = writer.clone( child, true ); + } + + writer.appendChild( clonedChild, defElements.content ); + } + + writer.appendChild( defElements.listItem, footnotesDefinitionsList ); + } +} + +/** + * Removes all MS Office specific references from the given element. + * + * It also removes leading space from text nodes following the references, as MS Word adds + * them to separate the reference from the rest of the text. + * + * @param writer The view writer. + * @param element The element to trim. + * @returns The trimmed element. + */ +function removeMSReferences( writer: ViewUpcastWriter, element: ViewElement ): ViewElement { + const elementsToRemove: Array = []; + const textNodesToTrim: Array = []; + + for ( const { item } of writer.createRangeIn( element ) ) { + if ( item.is( 'element' ) && item.getStyle( 'mso-footnote-id' ) ) { + elementsToRemove.unshift( item ); + + // MS Word used to add spaces after footnote references within definitions. Let's check if there's a space after + // the footnote reference and mark it for trimming. + const { nextSibling } = item; + + if ( nextSibling?.is( '$text' ) && nextSibling.data.startsWith( ' ' ) ) { + textNodesToTrim.unshift( nextSibling ); + } + } + } + + for ( const element of elementsToRemove ) { + writer.remove( element ); + } + + // Remove only the leading space from text nodes following reference within definition, preserve the rest of the text. + for ( const textNode of textNodesToTrim ) { + const trimmedData = textNode.data.substring( 1 ); + + if ( trimmedData.length > 0 ) { + // Create a new text node and replace the old one. + const parent = textNode.parent!; + const index = parent.getChildIndex( textNode ); + const newTextNode = writer.createText( trimmedData ); + + writer.remove( textNode ); + writer.insertChild( index, newTextNode, parent ); + } else { + // If the text node contained only a space, remove it entirely. + writer.remove( textNode ); + } + } + + return element; +} + +/** + * Creates a footnotes list view element. + * + * @param writer The view writer instance. + * @returns The footnotes list view element. + */ +function createFootnotesListViewElement( writer: ViewUpcastWriter ): ViewElement { + return writer.createElement( 'ol', { class: 'footnotes' } ); +} + +/** + * Creates a footnote reference view element. + * + * @param writer The view writer instance. + * @param footnoteId The footnote ID. + * @returns The footnote reference view element. + */ +function createFootnoteRefViewElement( writer: ViewUpcastWriter, footnoteId: string ): ViewElement { + const sup = writer.createElement( 'sup', { class: 'footnote' } ); + const link = writer.createElement( 'a', { + id: `ref-${ footnoteId }`, + href: `#${ footnoteId }` + } ); + + writer.appendChild( link, sup ); + + return sup; +} + +/** + * Creates a footnote definition view element with a backlink and a content container. + * + * @param writer The view writer instance. + * @param footnoteId The footnote ID. + * @returns An object containing the list item element, backlink and content container. + */ +function createFootnoteDefViewElement( writer: ViewUpcastWriter, footnoteId: string ): { + listItem: ViewElement; + content: ViewElement; +} { + const listItem = writer.createElement( 'li', { + id: footnoteId, + class: 'footnote-definition' + } ); + + const backLink = writer.createElement( 'a', { + href: `#ref-${ footnoteId }`, + class: 'footnote-backlink' + } ); + + const content = writer.createElement( 'div', { + class: 'footnote-content' + } ); + + writer.appendChild( writer.createText( '^' ), backLink ); + writer.appendChild( backLink, listItem ); + writer.appendChild( content, listItem ); + + return { + listItem, + content + }; +} diff --git a/packages/ckeditor5-paste-from-office/src/normalizers/mswordnormalizer.ts b/packages/ckeditor5-paste-from-office/src/normalizers/mswordnormalizer.ts index 1013e334aa9..55c5b6925a7 100644 --- a/packages/ckeditor5-paste-from-office/src/normalizers/mswordnormalizer.ts +++ b/packages/ckeditor5-paste-from-office/src/normalizers/mswordnormalizer.ts @@ -13,6 +13,7 @@ import { replaceImagesSourceWithBase64 } from '../filters/image.js'; import { removeMSAttributes } from '../filters/removemsattributes.js'; import { transformTables } from '../filters/table.js'; import { removeInvalidTableWidth } from '../filters/removeinvalidtablewidth.js'; +import { replaceMSFootnotes } from '../filters/replacemsfootnotes.js'; import { ViewUpcastWriter, type ViewDocument } from 'ckeditor5/src/engine.js'; import type { PasteFromOfficeNormalizer, PasteFromOfficeNormalizerData } from '../normalizer.js'; @@ -56,6 +57,7 @@ export class PasteFromOfficeMSWordNormalizer implements PasteFromOfficeNormalize replaceImagesSourceWithBase64( documentFragment, data.dataTransfer.getData( 'text/rtf' ) ); transformTables( documentFragment, writer ); removeInvalidTableWidth( documentFragment, writer ); + replaceMSFootnotes( documentFragment, writer ); removeMSAttributes( documentFragment ); data.content = documentFragment; diff --git a/packages/ckeditor5-paste-from-office/tests/filters/replacemsfootnotes.js b/packages/ckeditor5-paste-from-office/tests/filters/replacemsfootnotes.js new file mode 100644 index 00000000000..460495acc90 --- /dev/null +++ b/packages/ckeditor5-paste-from-office/tests/filters/replacemsfootnotes.js @@ -0,0 +1,489 @@ +/** + * @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved. + * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options + */ + +import { HtmlDataProcessor, ViewUpcastWriter, ViewDocument, StylesProcessor } from '@ckeditor/ckeditor5-engine'; +import { replaceMSFootnotes } from '../../src/filters/replacemsfootnotes.js'; + +describe( 'PasteFromOffice - filters', () => { + const htmlDataProcessor = new HtmlDataProcessor( new ViewDocument( new StylesProcessor() ) ); + + describe( 'replaceMSFootnotes', () => { + let writer, viewDocument; + + before( () => { + viewDocument = new ViewDocument(); + writer = new ViewUpcastWriter( viewDocument ); + } ); + + it( 'should transform single MS footnote if footnotes list present', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + 213213 + +

+
+ +
+
+ +
+

+ + + + + [1] + + + + + Test footnote + +

+
+
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '213213 ' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    Test footnote 

    ' + + '
    ' + + '
  2. ' + + '
' + ); + } ); + + it( 'should transform multiple MS footnotes if footnotes list present', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + 213213 + + + + + [2] + + + + + +

+
+ +
+
+ +
+

+ + + + + [1] + + + + + Test foot note 1 + +

+
+
+

+ + + + + [2] + + + + + Test foot note 2 + +

+
+
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '213213 ' + + '' + + '' + + '' + + '' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    Test foot note 1 

    ' + + '
    ' + + '
  2. ' + + '
  3. ' + + '^' + + '
    ' + + '

    Test foot note 2 

    ' + + '
    ' + + '
  4. ' + + '
' + ); + } ); + + it( 'should not transform MS footnotes if footnotes list is missing', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + 213213 + +

+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '[1] ' + + '' + + '' + + '' + + '213213 ' + + '

' + ); + } ); + + it( 'should handle scenario when there is footnote reference that does not have corresponding definition', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + 213213 + + + + + [2] + + + + + +

+
+ +
+
+ +
+

+ + + + + [1] + + + + + Test foot note 1 + +

+
+
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '213213 ' + + '' + + '' + + '' + + '[2] ' + + '' + + '' + + '' + + '' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    Test foot note 1 

    ' + + '
    ' + + '
  2. ' + + '
' + ); + } ); + + it( 'should remove single space text nodes following footnote references in definitions', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + +

+
+ +
+
+ +
+

+ + + + + [1] + + + +  Footnote content + +

+
+
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    Footnote content 

    ' + + '
    ' + + '
  2. ' + + '
' + ); + } ); + + it( 'should work properly if there are no text in footnote definition except the reference', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + +

+
+ +
+
+ + +
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    ' + + '
    ' + + '
  2. ' + + '
' + ); + } ); + + it( 'should handle scenario when there is table in footnote definition', () => { + const inputData = ` +

+ Hello World + + + + + [1] + + + + + +

+
+ +
+
+ +
+

+ + + + + [1] + + + +   + + + + + +
Cell 1Cell 2
+ +

+
+
+ `; + + const documentFragment = htmlDataProcessor.toView( inputData ); + + replaceMSFootnotes( documentFragment, writer ); + + expect( htmlDataProcessor.toData( documentFragment ) ).to.equal( + '

' + + 'Hello World ' + + '' + + '' + + '' + + '' + + '

' + + '
    ' + + '
  1. ' + + '^' + + '
    ' + + '

    ' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + '
    Cell 1Cell 2
    ' + + '' + + '

    ' + + '
    ' + + '
  2. ' + + '
' + ); + } ); + } ); +} ); From 3c646f9fa2bf4ef6d1a5114658bc47d41549e59d Mon Sep 17 00:00:00 2001 From: Kuba Niegowski <1232187+niegowski@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:22:40 +0200 Subject: [PATCH 2/2] Apply suggestions from code review. --- .../src/filters/replacemsfootnotes.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts b/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts index 304b6071393..d6d5643e259 100644 --- a/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts +++ b/packages/ckeditor5-paste-from-office/src/filters/replacemsfootnotes.ts @@ -50,7 +50,7 @@ import type { ViewDocumentFragment, ViewElement, ViewText, ViewUpcastWriter } fr * * ``` * - * @param documentFragment element `data.content` obtained from clipboard + * @param documentFragment `data.content` obtained from clipboard. * @param writer The view writer instance. * @internal */