diff --git a/README.md b/README.md index 3caf6ec..a163d4c 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ const content = seeMarkReactParse(markdown); | Option Name | Type | Default Value | Description | | ------------------------- | ------- | -------------- | ------------------------------------------------------------------ | +| latexOnly | boolean | false | When true, disables AsciiMath support. Only LaTeX math expressions are parsed; backtick-delimited AsciiMath is treated as plain text. | | latexDelimiter | string | 'bracket' | The delimiter for LaTeX expressions. Options: 'bracket', 'dollar'. | | documentFormat | string | 'inline' | The format of the document. Options: 'inline', 'block'. | | imageFiles | object | { [ID]: File } | A map of image IDs to File objects for image rendering. | @@ -123,6 +124,13 @@ const toc = createTableOfContents(markdown); `createTableOfContents` parses a markdown string and returns a flat array of all h1–h6 headings in document order. The `id` of each entry is generated with the same slugify logic used by the seemark's markdown parser, so IDs are guaranteed to match the `id` prop on rendered heading components. +### Options + +| Option Name | Type | Default Value | Description | +| -------------- | ------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------- | +| latexOnly | boolean | false | When true, disables AsciiMath support. Must match the value used by the markdown renderer so that math tokens in headings are parsed consistently. | +| latexDelimiter | string | 'bracket' | The delimiter for LaTeX expressions. Must match the value used by the markdown renderer. Options: 'bracket', 'dollar'. | + ### Return value Each entry in the returned array has the following shape: diff --git a/package.json b/package.json index 47ee76d..fde1c7c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@coseeing/see-mark", - "version": "1.8.0", + "version": "1.9.0", "description": "A markdown parser for a11y", "main": "./lib/see-mark.cjs", "files": [ diff --git a/src/markdown-processor/markdown-processor.js b/src/markdown-processor/markdown-processor.js index d763989..d00e80c 100644 --- a/src/markdown-processor/markdown-processor.js +++ b/src/markdown-processor/markdown-processor.js @@ -24,6 +24,7 @@ export const createMarkdownProcessor = (options = {}) => { documentFormat: options.documentFormat, imageFiles: options.imageFiles, shouldBuildImageObjectURL: options.shouldBuildImageObjectURL, + latexOnly: options.latexOnly, extensions: [ math, nemeth, diff --git a/src/markdown-processor/markdown-processor.test.js b/src/markdown-processor/markdown-processor.test.js index a8774d0..c014a2a 100644 --- a/src/markdown-processor/markdown-processor.test.js +++ b/src/markdown-processor/markdown-processor.test.js @@ -308,6 +308,51 @@ describe('markdownProcessor', () => { expect(container).toMatchSnapshot(); }); + it('should handle asciimath expressions when latexOnly is false', () => { + const markdownContent = '`x^2`'; + const options = { + latexDelimiter: 'bracket', + documentFormat: 'inline', + imageFiles: {}, + latexOnly: false, + }; + + const result = markdownProcessor(markdownContent, options); + + const container = createDOMFromHTML(result); + + const mathEl = getElementByType(container, SUPPORTED_COMPONENT_TYPES.MATH); + + expect(mathEl).toBeTruthy(); + + const payload = JSON.parse( + mathEl.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + + expect(payload.typed).toBe('asciimath'); + expect(payload.math).toBe('x^2'); + expect(payload.mathMl).toBeTruthy(); + expect(payload.svg).toBeTruthy(); + }); + + it('should not process asciimath expressions when latexOnly is true', () => { + const markdownContent = '`x^2`'; + const options = { + latexDelimiter: 'bracket', + documentFormat: 'inline', + imageFiles: {}, + latexOnly: true, + }; + + const result = markdownProcessor(markdownContent, options); + + const container = createDOMFromHTML(result); + + const mathEl = getElementByType(container, SUPPORTED_COMPONENT_TYPES.MATH); + + expect(mathEl).toBeNull(); + }); + it('should process image link', () => { const markdownContent = `![alt text](image-123)((https://example.com/target))`; const options = { diff --git a/src/markdown-processor/marked-extentions/math.js b/src/markdown-processor/marked-extentions/math.js index 9fc3b8b..751b284 100644 --- a/src/markdown-processor/marked-extentions/math.js +++ b/src/markdown-processor/marked-extentions/math.js @@ -49,29 +49,42 @@ const AsciiMath_delimiter_dict = { * @param {string} options.latexDelimiter - LaTeX delimiter style ('bracket', 'dollar', 'latex') * @param {string} options.asciimathDelimiter - AsciiMath delimiter style ('graveaccent', 'asciimath') * @param {string} options.documentFormat - Document format for MathML display ('inline' or 'block') + * @param {boolean} [options.latexOnly=false] - When true, disables AsciiMath support * @returns {Object} Marked extension object with math tokenizer and renderer */ -const markedMath = ({ latexDelimiter, asciimathDelimiter, documentFormat }) => { - const asciimath2mml = asciimath2mmlFactory({ - htmlMathDisplay: documentFormat, - }); +const markedMath = ({ + latexDelimiter, + asciimathDelimiter, + documentFormat, + latexOnly = false, +}) => { const latex2mml = latex2mmlFactory({ htmlMathDisplay: documentFormat }); + const asciimath2mml = latexOnly + ? null + : asciimath2mmlFactory({ htmlMathDisplay: documentFormat }); const LaTeX_delimiter = LaTeX_delimiter_dict[latexDelimiter]; - const AsciiMath_delimiter = AsciiMath_delimiter_dict[asciimathDelimiter]; const latex_restring = `(?<=[^\\\\]?)${LaTeX_delimiter.start}(.*?[^\\\\])?${LaTeX_delimiter.end}`; - const asciimath_restring = `(?<=[^\\\\]?)${AsciiMath_delimiter.start}(.*?[^\\\\])?${AsciiMath_delimiter.end}`; - const reTexMath = new RegExp( - `(.*?)(${latex_restring}|${asciimath_restring})`, - 's' - ); - const latex_start_restring = `(?<=[^\\\\]?)${LaTeX_delimiter.start}`; - const asciimath_start_restring = `(?<=[^\\\\]?)${AsciiMath_delimiter.start}`; - const reTexMath_start = new RegExp( - `${latex_start_restring}|${asciimath_start_restring}` - ); + + let reTexMath, reTexMath_start; + + if (latexOnly) { + reTexMath = new RegExp(`(.*?)(${latex_restring})`, 's'); + reTexMath_start = new RegExp(latex_start_restring); + } else { + const AsciiMath_delimiter = AsciiMath_delimiter_dict[asciimathDelimiter]; + const asciimath_restring = `(?<=[^\\\\]?)${AsciiMath_delimiter.start}(.*?[^\\\\])?${AsciiMath_delimiter.end}`; + const asciimath_start_restring = `(?<=[^\\\\]?)${AsciiMath_delimiter.start}`; + reTexMath = new RegExp( + `(.*?)(${latex_restring}|${asciimath_restring})`, + 's' + ); + reTexMath_start = new RegExp( + `${latex_start_restring}|${asciimath_start_restring}` + ); + } return { extensions: [ @@ -86,13 +99,17 @@ const markedMath = ({ latexDelimiter, asciimathDelimiter, documentFormat }) => { const match = reTexMath.exec(src); if (match) { const math = match[3] || match[4]; - const AsciiMath_delimiter_raw_start = - AsciiMath_delimiter.start.replace(/\\\\\\/g, '\\'); let typed; - if (match[2].startsWith(AsciiMath_delimiter_raw_start)) { - typed = 'asciimath'; - } else { + if (latexOnly) { typed = 'latex'; + } else { + const AsciiMath_delimiter = + AsciiMath_delimiter_dict[asciimathDelimiter]; + const AsciiMath_delimiter_raw_start = + AsciiMath_delimiter.start.replace(/\\\\\\/g, '\\'); + typed = match[2].startsWith(AsciiMath_delimiter_raw_start) + ? 'asciimath' + : 'latex'; } return { type: 'math', diff --git a/src/markdown-processor/marked-wrapper/marked-wrapper.js b/src/markdown-processor/marked-wrapper/marked-wrapper.js index f400fcd..decb4ce 100644 --- a/src/markdown-processor/marked-wrapper/marked-wrapper.js +++ b/src/markdown-processor/marked-wrapper/marked-wrapper.js @@ -8,6 +8,7 @@ const markedProcessorFactory = ({ documentFormat, imageFiles = {}, shouldBuildImageObjectURL = false, + latexOnly = false, extensions = [], }) => { const marked = new Marked(); @@ -30,6 +31,7 @@ const markedProcessorFactory = ({ documentFormat, imageFiles, shouldBuildImageObjectURL, + latexOnly, }) ); }); diff --git a/src/parsers/options.js b/src/parsers/options.js index 39fca46..56dea86 100644 --- a/src/parsers/options.js +++ b/src/parsers/options.js @@ -3,6 +3,7 @@ const DEFAULT_OPTINOS = { documentFormat: 'inline', imageFiles: null, shouldBuildImageObjectURL: false, + latexOnly: false, }; export const createMarkdownParserOptions = (options = DEFAULT_OPTINOS) => { diff --git a/src/table-of-contents/create-table-of-contents.js b/src/table-of-contents/create-table-of-contents.js index f86a365..77af3b4 100644 --- a/src/table-of-contents/create-table-of-contents.js +++ b/src/table-of-contents/create-table-of-contents.js @@ -34,6 +34,8 @@ function extractPlainText(inlineTokens = []) { * Must match the value used by the markdown renderer so that math tokens * inside headings are recognised and their text is correctly extracted. * Defaults to 'bracket'. + * @param {boolean} [options.latexOnly=false] - When true, disables AsciiMath support. + * Must match the value used by the markdown renderer. * @returns {{ level: number, id: string, text: string }[]} * * @example @@ -46,6 +48,7 @@ function extractPlainText(inlineTokens = []) { const createTableOfContents = (markdown, options = {}) => { const { lexer } = createMarkdownProcessor({ latexDelimiter: options.latexDelimiter ?? 'bracket', + latexOnly: options.latexOnly ?? false, }); const tokens = lexer(markdown); const usedIds = new Map();