diff --git a/doc/main.html b/doc/main.html index f45670e..bb206a0 100644 --- a/doc/main.html +++ b/doc/main.html @@ -618,12 +618,12 @@
sectionsectiondt element, in which case all the terms, abbreviations and symbols in the immediately
preceding dt elements are synonyms.
dd elements following the dt element(s) shall appear in the following
- order — each group is optional (except the definition), but if present, it shall appear in this sequence:
+ order - each group is optional (except the definition), but if present, it shall appear in this sequence:
dd element, if present, shall have a class
attribute containing deprecated, indicating the term is deprecated. At most one such element is permitted.sectionTables may include footnotes. Footnotes shall appear in a tfoot element at the foot of the table,
- each as a p element with class="footnote" and a unique id attribute. Footnotes are automatically assigned superscript lowercase letters (a, b, c, …) in the order they appear in the tfoot element.
+ each as a p element with class="footnote" and a unique id attribute. Footnotes are automatically assigned superscript lowercase letters (a, b, c, etc) in the order they appear in the tfoot element.
References to footnotes within the table body are marked with an a element whose href
attribute references the footnote's id.
Tools such as https://temml.org/ can be used to convert from Latex to MathML.
+The unicode minus sign (−) is used in math expressions instead of the hyphen-minus, e.g. −42 renders as −42, and 6x − 103y renders as 6x − 103y.
Characters should be encoded as UTF-8-encoded Unicode codepoints, e.g. あ, instead of HTML entities, e.g. あ, except as needed for usage in pre or examples.
The characters listed in are an exception to this guidance. They are commonly inserted by word processors (e.g. curly quotes substituted for straight quotes, en/em dashes substituted for hyphens) and shall not appear as literal codepoints in the document source. When one of these characters is genuinely needed, it shall be written as a unicode character reference (e.g. “) so that author intent is explicit.
| Codepoint | Name | Preview | Reference |
|---|---|---|---|
| U+2018 | Left Single Quotation Mark | ‘ | ‘ |
| U+2019 | Right Single Quotation Mark | ’ | ’ |
| U+201C | Left Double Quotation Mark | “ | “ |
| U+201D | Right Double Quotation Mark | ” | ” |
| U+2013 | En Dash | – | – |
| U+2014 | Em Dash | — | — |
| U+2011 | Non-Breaking Hyphen | ‑ | ‑ |
| U+2026 | Horizontal Ellipsis | … | … |
| U+00A0 | No-Break Space | [ ] |   |
| U+00AD | Soft Hyphen | [] | ­ |
| U+00A9 | Copyright Sign | © | © |
| U+00AE | Registered Sign | ® | ® |
The validator reports each literal occurrence with line and column information. Numeric unicode, or named character references (e.g. “, “) are recognized as deliberate and are not flagged.
class attributesp or div elements to create a formatted and automatically numbered example. See .footnotep element inside a tfoot element to create a table footnote. Footnotes are only permitted inside a tfoot element — use outside of tables is not allowed. See .p element inside a tfoot element to create a table footnote. Footnotes are only permitted inside a tfoot element - use outside of tables is not allowed. See .center-cell;right-cell;
- Alignment classes are available for columns 1–8 using col-x-center, col-x-right, col-x-left, and col-x-top. Multiple classes may be combined on the same table to align different columns independently.
+ Alignment classes are available for columns 1-8 using col-x-center, col-x-right, col-x-left, and col-x-top. Multiple classes may be combined on the same table to align different columns independently.
Tables over 8 columns wide should not be used and instead split into multiple smaller tables. diff --git a/js/validate.mjs b/js/validate.mjs index 1145f57..d63eb37 100644 --- a/js/validate.mjs +++ b/js/validate.mjs @@ -115,15 +115,179 @@ function validateFootnoteReferences(root, logger) { } } -export function validateDataIncludes(doc, logger, fileExists = null) { +export function validateDataIncludes(doc, logger, readFile = null) { + if (readFile === null) return; for (const el of doc.querySelectorAll("pre[data-include]")) { const src = el.getAttribute("data-include"); - if (fileExists === null || !fileExists(src)) + const content = readFile(src); + if (content === null) { logger.error(`data-include file not found: ${src}`, el); + continue; + } + for (const m of content.matchAll(ILLEGAL_CHARS_RE)) { + const before = content.slice(0, m.index); + const line = before.split("\n").length; + const lastNl = before.lastIndexOf("\n"); + const col = lastNl === -1 ? m.index + 1 : m.index - lastNl; + const message = `Illegal character ${_formatChar(m[0])} in ${src} at line ${line}, column ${col}`; + if (typeof logger.errorList === "function") + logger.error(message, el); + else + logger.error(message); + } + } +} + +const ILLEGAL_CHARS_RE = /[‘’“”–—‑… ©®]/g; + +function _formatChar(ch) { + return `U+${ch.codePointAt(0).toString(16).toUpperCase().padStart(4, "0")}`; +} + +const ILLEGAL_CHARS_SET = new Set("‘’“”–—‑… ©®"); +const NAMED_ENTITIES = { + ldquo: "“", rdquo: "”", lsquo: "‘", rsquo: "’", + ndash: "–", mdash: "—", hellip: "…", NonBreakingHyphen: "‑", + nbsp: " ", shy: "", copy: "©", reg: "®", +}; +const ID_ATTR_RE = /\bid\s*=\s*["']([^"']+)["']/g; + +function _resolveEntity(body) { + if (body[0] === "#") { + const isHex = body[1] === "x" || body[1] === "X"; + const num = isHex ? parseInt(body.slice(2), 16) : parseInt(body.slice(1), 10); + return Number.isFinite(num) ? String.fromCodePoint(num) : null; + } + return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, body) ? NAMED_ENTITIES[body] : null; +} + +function _scanSource(source) { + const textTokens = []; + const attrLiterals = []; + let inTag = false; + let inComment = false; + let inRawText = null; + let i = 0; + while (i < source.length) { + if (inComment) { + if (source.startsWith("-->", i)) { inComment = false; i += 3; continue; } + i++; + continue; + } + if (inRawText) { + const closeRe = new RegExp(`^${inRawText}\\s*>`, "i"); + const m = source.slice(i).match(closeRe); + if (m) { inRawText = null; i += m[0].length; continue; } + i++; + continue; + } + if (inTag) { + if (source[i] === ">") { inTag = false; i++; continue; } + if (ILLEGAL_CHARS_SET.has(source[i])) + attrLiterals.push({ char: source[i], srcIdx: i }); + i++; + continue; + } + if (source.startsWith("