diff --git a/doc/main.html b/doc/main.html
index f45670e..bb206a0 100644
--- a/doc/main.html
+++ b/doc/main.html
@@ -618,12 +618,12 @@ <h4>Normative references <code>section</code></h4>
       &lt;a&gt;https://html.spec.whatwg.org/multipage/&lt;/a&gt;
     &lt;/li&gt;
     &lt;li&gt;
-      &lt;cite id=&quot;bib-SMPTE-st429-18&quot;&gt;SMPTE ST 429-18&lt;/cite&gt;, D-Cinema Packaging — Immersive Audio Track 
+      &lt;cite id=&quot;bib-SMPTE-st429-18&quot;&gt;SMPTE ST 429-18&lt;/cite&gt;, D-Cinema Packaging - Immersive Audio Track 
       File 
       &lt;span class=&quot;doi&quot;&gt;10.5594/SMPTE.ST429-18&lt;/span&gt;
     &lt;/li&gt;
     &lt;li&gt;
-      &lt;cite id=&quot;bib-SMPTE-st429-18-2023&quot;&gt;SMPTE ST 429-18:2023&lt;/cite&gt;, D-Cinema Packaging — Immersive 
+      &lt;cite id=&quot;bib-SMPTE-st429-18-2023&quot;&gt;SMPTE ST 429-18:2023&lt;/cite&gt;, D-Cinema Packaging - Immersive 
       Audio Track File (2023 Edition)
       &lt;span class=&quot;doi&quot;&gt;10.5594/SMPTE.ST429-18.2023&lt;/span&gt;
     &lt;/li&gt;
@@ -665,7 +665,7 @@ <h4>Terms and definitions <code>section</code></h4>
               can follow the first <code>dt</code> element, in which case all the terms, abbreviations and symbols in the immediately
               preceding <code>dt</code> elements are synonyms.</li>
               <li>The <code>dd</code> elements following the <code>dt</code> element(s) shall appear in the following
-              order — each group is optional (except the definition), but if present, it shall appear in this sequence:
+              order - each group is optional (except the definition), but if present, it shall appear in this sequence:
                 <ol>
                   <li><strong>Deprecated:</strong> the first <code>dd</code> element, if present, shall have a <code>class</code>
                   attribute containing <code>deprecated</code>, indicating the term is deprecated. At most one such element is permitted.</li>
@@ -700,7 +700,7 @@ <h4>Terms and definitions <code>section</code></h4>
     manufacture at regular intervals, typically one foot&lt;/dd&gt;
     &lt;dd class="example"&gt;An example of a key number is 12345.&lt;/dd&gt;
     &lt;dd class="note"&gt;Key number shall not be confused with key frame.&lt;/dd&gt;
-    &lt;dd class="source"&gt;&lt;a href=&quot;#bib-key-number-spec&quot;&gt;&lt;/a&gt;, modified — definition has been updated.&lt;/dd&gt;
+    &lt;dd class="source"&gt;&lt;a href=&quot;#bib-key-number-spec&quot;&gt;&lt;/a&gt;, modified - definition has been updated.&lt;/dd&gt;
   &lt;/dl&gt;
 &lt;/section&gt;
 </pre>
@@ -1156,7 +1156,7 @@ <h4>Tables</h4>
         </p>
 
         <p>Tables may include footnotes. Footnotes shall appear in a <code>tfoot</code> element at the foot of the table,
-        each as a <code>p</code> element with <code>class="footnote"</code> and a unique <code>id</code> attribute. Footnotes are automatically assigned superscript lowercase letters (a, b, c, …) in the order they appear in the <code>tfoot</code> element. 
+        each as a <code>p</code> element with <code>class="footnote"</code> and a unique <code>id</code> attribute. Footnotes are automatically assigned superscript lowercase letters (a, b, c, etc) in the order they appear in the <code>tfoot</code> element. 
         References to footnotes within the table body are marked with an <code>a</code> element whose <code>href</code>
         attribute references the footnote's <code>id</code>.</p>
 
@@ -1434,13 +1434,40 @@ <h4>Formulae</h4>
 
         <p class="note">Tools such as <a>https://temml.org/</a> can be used to convert from Latex to MathML.</p>
 
+        <p>The unicode minus sign (<code>&amp;#x2212;</code>) is used in math expressions instead of the hyphen-minus, e.g. <code>&amp;#x2212;42</code> renders as &#x2212;42, and <code>6x &amp;#x2212; 103y</code> renders as 6x &#x2212; 103y.</p>
+
       </section>
 
       <section id="sec-special-characters">
         <h4>Special Characters</h4>
         <p>Characters should be encoded as UTF-8-encoded Unicode codepoints, e.g. あ, instead of HTML entities, e.g. <code>&amp;#x3042;</code>, except as needed for usage in <code>pre</code> or examples.
         </p>
-    
+
+        <p>The characters listed in <a href="#tab-illegal-characters"></a> are an exception to this guidance. They are commonly inserted by word processors (e.g. curly quotes substituted for straight quotes, en/em dashes substituted for hyphens) and shall not appear as literal codepoints in the document source. When one of these characters is genuinely needed, it shall be written as a unicode character reference (e.g. <code>&amp;#x201C;</code>) so that author intent is explicit.</p>
+
+        <table id="tab-illegal-characters" class="col-3-center">
+          <caption>Characters not permitted as literals in document source</caption>
+          <thead>
+            <tr><th>Codepoint</th><th>Name</th><th>Preview</th><th>Reference</th></tr>
+          </thead>
+          <tbody>
+            <tr><td>U+2018</td><td>Left Single Quotation Mark</td><td>&#x2018;</td><td><code>&amp;#x2018;</code></td></tr>
+            <tr><td>U+2019</td><td>Right Single Quotation Mark</td><td>&#x2019;</td><td><code>&amp;#x2019;</code></td></tr>
+            <tr><td>U+201C</td><td>Left Double Quotation Mark</td><td>&#x201C;</td><td><code>&amp;#x201C;</code></td></tr>
+            <tr><td>U+201D</td><td>Right Double Quotation Mark</td><td>&#x201D;</td><td><code>&amp;#x201D;</code></td></tr>
+            <tr><td>U+2013</td><td>En Dash</td><td>&#x2013;</td><td><code>&amp;#x2013;</code></td></tr>
+            <tr><td>U+2014</td><td>Em Dash</td><td>&#x2014;</td><td><code>&amp;#x2014;</code></td></tr>
+            <tr><td>U+2011</td><td>Non-Breaking Hyphen</td><td>&#x2011;</td><td><code>&amp;#x2011;</code></td></tr>
+            <tr><td>U+2026</td><td>Horizontal Ellipsis</td><td>&#x2026;</td><td><code>&amp;#x2026;</code></td></tr>
+            <tr><td>U+00A0</td><td>No-Break Space</td><td>[&#xA0;]</td><td><code>&amp;#xA0;</code></td></tr>
+            <tr><td>U+00AD</td><td>Soft Hyphen</td><td>[&#xAD;]</td><td><code>&amp;#xAD;</code></td></tr>
+            <tr><td>U+00A9</td><td>Copyright Sign</td><td>&#xA9;</td><td><code>&amp;#xA9;</code></td></tr>
+            <tr><td>U+00AE</td><td>Registered Sign</td><td>&#xAE;</td><td><code>&amp;#xAE;</code></td></tr>
+          </tbody>
+        </table>
+
+        <p class="note">The validator reports each literal occurrence with line and column information. Numeric unicode, or named character references (e.g. <code>&amp;#x201C;</code>, <code>&amp;ldquo;</code>) are recognized as deliberate and are not flagged.</p>
+
       </section>
       
       <section id="sec-code-element-usage">
@@ -1489,7 +1516,7 @@ <h4>Using <code>class</code> attributes</h4>
           <dd>Applied to <code>p</code> or <code>div</code> elements to create a formatted and automatically numbered example. See <a href="#sec-example-class"></a>.</dd>
 
           <dt><code>footnote</code></dt>
-          <dd>Applied to a <code>p</code> element inside a <code>tfoot</code> element to create a table footnote. Footnotes are only permitted inside a <code>tfoot</code> element — use outside of tables is not allowed. See <a href="#sec-table-element"></a>.</dd>
+          <dd>Applied to a <code>p</code> element inside a <code>tfoot</code> element to create a table footnote. Footnotes are only permitted inside a <code>tfoot</code> element - use outside of tables is not allowed. See <a href="#sec-table-element"></a>.</dd>
 
           <dt><code>center-cell</code>;</dt>
           <dt><code>right-cell</code>;</dt>
@@ -1953,7 +1980,7 @@ <h4>Column Alignment</h4>
         </table>
 
         <p class="note">
-          Alignment classes are available for columns 1–8 using <code>col-x-center</code>, <code>col-x-right</code>, <code>col-x-left</code>, and <code>col-x-top</code>. Multiple classes may be combined on the same <code>table</code> to align different columns independently.
+          Alignment classes are available for columns 1-8 using <code>col-x-center</code>, <code>col-x-right</code>, <code>col-x-left</code>, and <code>col-x-top</code>. Multiple classes may be combined on the same <code>table</code> to align different columns independently.
         </p>
         <p class="note">
           Tables over 8 columns wide should not be used and instead split into multiple smaller tables.
diff --git a/js/validate.mjs b/js/validate.mjs
index 1145f57..d63eb37 100644
--- a/js/validate.mjs
+++ b/js/validate.mjs
@@ -115,15 +115,179 @@ function validateFootnoteReferences(root, logger) {
   }
 }
 
-export function validateDataIncludes(doc, logger, fileExists = null) {
+export function validateDataIncludes(doc, logger, readFile = null) {
+  if (readFile === null) return;
   for (const el of doc.querySelectorAll("pre[data-include]")) {
     const src = el.getAttribute("data-include");
-    if (fileExists === null || !fileExists(src))
+    const content = readFile(src);
+    if (content === null) {
       logger.error(`data-include file not found: ${src}`, el);
+      continue;
+    }
+    for (const m of content.matchAll(ILLEGAL_CHARS_RE)) {
+      const before = content.slice(0, m.index);
+      const line = before.split("\n").length;
+      const lastNl = before.lastIndexOf("\n");
+      const col = lastNl === -1 ? m.index + 1 : m.index - lastNl;
+      const message = `Illegal character ${_formatChar(m[0])} in ${src} at line ${line}, column ${col}`;
+      if (typeof logger.errorList === "function")
+        logger.error(message, el);
+      else
+        logger.error(message);
+    }
+  }
+}
+
+const ILLEGAL_CHARS_RE = /[‘’“”–—‑… ­©®]/g;
+
+function _formatChar(ch) {
+  return `U+${ch.codePointAt(0).toString(16).toUpperCase().padStart(4, "0")}`;
+}
+
+const ILLEGAL_CHARS_SET = new Set("‘’“”–—‑… ­©®");
+const NAMED_ENTITIES = {
+  ldquo: "“", rdquo: "”", lsquo: "‘", rsquo: "’",
+  ndash: "–", mdash: "—", hellip: "…", NonBreakingHyphen: "‑",
+  nbsp: " ", shy: "­", copy: "©", reg: "®",
+};
+const ID_ATTR_RE = /\bid\s*=\s*["']([^"']+)["']/g;
+
+function _resolveEntity(body) {
+  if (body[0] === "#") {
+    const isHex = body[1] === "x" || body[1] === "X";
+    const num = isHex ? parseInt(body.slice(2), 16) : parseInt(body.slice(1), 10);
+    return Number.isFinite(num) ? String.fromCodePoint(num) : null;
+  }
+  return Object.prototype.hasOwnProperty.call(NAMED_ENTITIES, body) ? NAMED_ENTITIES[body] : null;
+}
+
+function _scanSource(source) {
+  const textTokens = [];
+  const attrLiterals = [];
+  let inTag = false;
+  let inComment = false;
+  let inRawText = null;
+  let i = 0;
+  while (i < source.length) {
+    if (inComment) {
+      if (source.startsWith("-->", i)) { inComment = false; i += 3; continue; }
+      i++;
+      continue;
+    }
+    if (inRawText) {
+      const closeRe = new RegExp(`^</${inRawText}\\s*>`, "i");
+      const m = source.slice(i).match(closeRe);
+      if (m) { inRawText = null; i += m[0].length; continue; }
+      i++;
+      continue;
+    }
+    if (inTag) {
+      if (source[i] === ">") { inTag = false; i++; continue; }
+      if (ILLEGAL_CHARS_SET.has(source[i]))
+        attrLiterals.push({ char: source[i], srcIdx: i });
+      i++;
+      continue;
+    }
+    if (source.startsWith("<!--", i)) { inComment = true; i += 4; continue; }
+    if (source[i] === "<") {
+      const m = source.slice(i).match(/^<(script|style)\b[^>]*>/i);
+      if (m) { inRawText = m[1].toLowerCase(); i += m[0].length; continue; }
+      inTag = true;
+      i++;
+      continue;
+    }
+    if (source[i] === "&") {
+      const m = source.slice(i).match(/^&(#x?[0-9A-Fa-f]+|[a-zA-Z]+);/);
+      if (m) {
+        const resolved = _resolveEntity(m[1]);
+        if (resolved && ILLEGAL_CHARS_SET.has(resolved))
+          textTokens.push({ char: resolved, form: "entity", srcIdx: i });
+        i += m[0].length;
+        continue;
+      }
+    }
+    if (ILLEGAL_CHARS_SET.has(source[i]))
+      textTokens.push({ char: source[i], form: "literal", srcIdx: i });
+    i++;
+  }
+  return { textTokens, attrLiterals };
+}
+
+function _enumDomTextIllegalChars(doc) {
+  const occs = [];
+  const NodeFilter = doc.defaultView.NodeFilter;
+  const walker = doc.createTreeWalker(doc.documentElement, NodeFilter.SHOW_TEXT);
+  for (let n = walker.nextNode(); n; n = walker.nextNode()) {
+    const parent = n.parentElement;
+    const parentName = parent && parent.localName.toLowerCase();
+    if (parentName === "script" || parentName === "style") continue;
+    for (const dm of n.nodeValue.matchAll(ILLEGAL_CHARS_RE))
+      occs.push({ textNode: n, offset: dm.index, char: dm[0] });
+  }
+  return occs;
+}
+
+function _findLineCol(source, idx) {
+  const before = source.slice(0, idx);
+  const line = before.split("\n").length;
+  const lastNl = before.lastIndexOf("\n");
+  const col = lastNl === -1 ? idx + 1 : idx - lastNl;
+  return { line, col };
+}
+
+function _findEnclosingByClosestId(source, idx, doc) {
+  const before = source.slice(0, idx);
+  const matches = [...before.matchAll(ID_ATTR_RE)];
+  if (matches.length === 0) return undefined;
+  const found = doc.getElementById(matches[matches.length - 1][1]);
+  return found || undefined;
+}
+
+function _logIllegalChar(logger, source, char, srcIdx, element) {
+  const { line, col } = _findLineCol(source, srcIdx);
+  const cp = char.codePointAt(0).toString(16).toUpperCase();
+  const message = `Illegal character ${_formatChar(char)} in source at line ${line}, column ${col}; use a unicode character reference (&#x${cp};) if intentional`;
+  if (element !== undefined && typeof logger.errorList === "function")
+    logger.error(message, element);
+  else
+    logger.error(message);
+}
+
+export function validateIllegalCharactersInSource(source, logger, doc = null) {
+  const { textTokens, attrLiterals } = _scanSource(source);
+
+  let domOccs = null;
+  if (doc !== null) {
+    const collected = _enumDomTextIllegalChars(doc);
+    if (collected.length === textTokens.length) {
+      let aligned = true;
+      for (let i = 0; i < textTokens.length; i++) {
+        if (textTokens[i].char !== collected[i].char) { aligned = false; break; }
+      }
+      if (aligned) domOccs = collected;
+    }
+  }
+
+  for (let i = 0; i < textTokens.length; i++) {
+    const t = textTokens[i];
+    if (t.form !== "literal") continue;
+    let element;
+    if (domOccs !== null)
+      element = domOccs[i].textNode.parentElement || undefined;
+    else if (doc !== null)
+      element = _findEnclosingByClosestId(source, t.srcIdx, doc);
+    _logIllegalChar(logger, source, t.char, t.srcIdx, element);
+  }
+
+  for (const a of attrLiterals) {
+    let element;
+    if (doc !== null)
+      element = _findEnclosingByClosestId(source, a.srcIdx, doc);
+    _logIllegalChar(logger, source, a.char, a.srcIdx, element);
   }
 }
 
-export function smpteValidate(doc, logger, fileExists = null) {
+export function smpteValidate(doc, logger, readFile = null, source = null) {
   const docMetadata = smpte.validateHead(doc.head, logger);
   validateDisallowedHeadLinks(doc.head, logger);
   validateDisallowedStyleAttributes(doc.documentElement, logger);
@@ -132,8 +296,10 @@ export function smpteValidate(doc, logger, fileExists = null) {
   validateTfootNoteOrder(doc.documentElement, logger);
   validateFootnoteReferences(doc.documentElement, logger);
   validateBody(doc.body, logger);
-  if (fileExists !== null)
-    validateDataIncludes(doc, logger, fileExists);
+  if (readFile !== null)
+    validateDataIncludes(doc, logger, readFile);
+  if (source !== null)
+    validateIllegalCharactersInSource(source, logger, doc);
   return docMetadata;
 }
 
diff --git a/scripts/validate.mjs b/scripts/validate.mjs
index 5dd1428..412f965 100644
--- a/scripts/validate.mjs
+++ b/scripts/validate.mjs
@@ -33,9 +33,13 @@ import {smpteValidate} from "../js/validate.mjs";
 
 async function main() {
   const filePath = path.resolve(process.argv[2]);
-  const dom = new jsdom.JSDOM(fs.readFileSync(filePath));
-  const fileExists = (src) => fs.existsSync(path.resolve(path.dirname(filePath), src));
-  smpteValidate(dom.window.document, console, fileExists);
+  const source = fs.readFileSync(filePath, "utf8");
+  const dom = new jsdom.JSDOM(source);
+  const readFile = (src) => {
+    const p = path.resolve(path.dirname(filePath), src);
+    return fs.existsSync(p) ? fs.readFileSync(p, "utf8") : null;
+  };
+  smpteValidate(dom.window.document, console, readFile, source);
 }
 
 main().catch(e => { console.error(e); process.exitCode = 1; });
diff --git a/smpte.js b/smpte.js
index 6ad8422..4a18595 100644
--- a/smpte.js
+++ b/smpte.js
@@ -1605,7 +1605,13 @@ window._smpteRenderComplete = false;
 
 document.addEventListener('DOMContentLoaded', async () => {
    try {
-    smpteValidate(window.document, logger_);
+    let source = null;
+    try {
+      source = await asyncFetchLocal(window.location.href);
+    } catch (e) {
+      console.warn("Source-based validation skipped: could not fetch document source.", e);
+    }
+    smpteValidate(window.document, logger_, null, source);
     await render();
     validateDataIncludes(window.document, logger_);
     window._smpteRenderComplete = true;
diff --git a/test/resources/html/snippets/illegal-chars.txt b/test/resources/html/snippets/illegal-chars.txt
new file mode 100644
index 0000000..19d712d
--- /dev/null
+++ b/test/resources/html/snippets/illegal-chars.txt
@@ -0,0 +1 @@
+<Element attr="he said “hello”"/>
diff --git a/test/resources/html/validation/bibliography-valid.html b/test/resources/html/validation/bibliography-valid.html
index ab14e96..57a7204 100644
--- a/test/resources/html/validation/bibliography-valid.html
+++ b/test/resources/html/validation/bibliography-valid.html
@@ -19,7 +19,7 @@
       <ul>
         <li><cite id="bib-draft-bhutton-json-schema-00">IETF draft-bhutton-json-schema-00</cite>, Wright, A., H. Andrews; B. Hutton, G. Dennis, "JSON Schema: A Media Type for Describing JSON Documents", Work in Progress, December 8, 2020.
         <a>https://datatracker.ietf.org/doc/html/draft-bhutton-json-schema-00</a></li>
-        <li><cite id="bib-iso-80000-1">ISO 80000-1</cite>, Quantities and units — Part 1: General
+        <li><cite id="bib-iso-80000-1">ISO 80000-1</cite>, Quantities and units - Part 1: General
         <a>https://www.iso.org/standard/30669.html</a></li>
       </ul>
     </section>
diff --git a/test/resources/html/validation/illegal-characters-attr-invalid.html b/test/resources/html/validation/illegal-characters-attr-invalid.html
new file mode 100644
index 0000000..c5b0425
--- /dev/null
+++ b/test/resources/html/validation/illegal-characters-attr-invalid.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="invalid" />
+    <meta itemprop="pubType" content="AG" />
+    <meta itemprop="pubNumber" content="99" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p title="don’t">The text is clean but the title attribute carries an illegal character.</p>
+    </section>
+  </body>
+</html>
diff --git a/test/resources/html/validation/illegal-characters-curly-quotes-invalid.html b/test/resources/html/validation/illegal-characters-curly-quotes-invalid.html
new file mode 100644
index 0000000..465c874
--- /dev/null
+++ b/test/resources/html/validation/illegal-characters-curly-quotes-invalid.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="invalid" />
+    <meta itemprop="pubType" content="AG" />
+    <meta itemprop="pubNumber" content="99" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p>He said “hello” and it’s ‘done’.</p>
+    </section>
+  </body>
+</html>
diff --git a/test/resources/html/validation/illegal-characters-dashes-invalid.html b/test/resources/html/validation/illegal-characters-dashes-invalid.html
new file mode 100644
index 0000000..e33f90b
--- /dev/null
+++ b/test/resources/html/validation/illegal-characters-dashes-invalid.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="invalid" />
+    <meta itemprop="pubType" content="AG" />
+    <meta itemprop="pubNumber" content="99" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p>The range 1–2 — done…</p>
+    </section>
+  </body>
+</html>
diff --git a/test/resources/html/validation/illegal-characters-entities-valid.html b/test/resources/html/validation/illegal-characters-entities-valid.html
new file mode 100644
index 0000000..720509b
--- /dev/null
+++ b/test/resources/html/validation/illegal-characters-entities-valid.html
@@ -0,0 +1,26 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="valid" />
+    <meta itemprop="pubType" content="AG" />
+    <meta itemprop="pubNumber" content="99" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p>Unicode character references for the same characters are deliberate and must not be flagged: &#x201C;hello&#x201D;, &#x2019;s, range 1&#x2013;2 &#x2014; done&#x2026;</p>
+      <table id="tbl-illegal-characters">
+        <caption>Table of illegal characters, used correctly</caption>
+        <tbody>
+          <tr><td>Left Double Quotation Mark</td><td>U+201C</td><td>&#x201C;</td></tr>
+          <tr><td>Em Dash</td><td>U+2014</td><td>&#x2014;</td></tr>
+        </tbody>
+      </table>
+    </section>
+  </body>
+</html>
diff --git a/test/resources/html/validation/illegal-characters-valid.html b/test/resources/html/validation/illegal-characters-valid.html
new file mode 100644
index 0000000..ee088b3
--- /dev/null
+++ b/test/resources/html/validation/illegal-characters-valid.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="valid" />
+    <meta itemprop="pubType" content="AG" />
+    <meta itemprop="pubNumber" content="99" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p>This is the scope of the document. It uses only "straight" quotes, hyphens-minus, and three dots... no curly quotes, en/em dashes, ellipses, NBSPs, or soft hyphens.</p>
+    </section>
+  </body>
+</html>
diff --git a/test/resources/html/validation/norm-ref-valid.html b/test/resources/html/validation/norm-ref-valid.html
index 1dc730a..793cae6 100644
--- a/test/resources/html/validation/norm-ref-valid.html
+++ b/test/resources/html/validation/norm-ref-valid.html
@@ -22,12 +22,12 @@
         <a>https://html.spec.whatwg.org/multipage/</a></li>
     </li>
     <li>
-      <cite id="bib-SMPTE-st429-18">SMPTE ST 429-18</cite>, D-Cinema Packaging — Immersive Audio Track 
+      <cite id="bib-SMPTE-st429-18">SMPTE ST 429-18</cite>, D-Cinema Packaging - Immersive Audio Track
       File 
         <span class="doi">10.5594/SMPTE.ST429-18/</span></li>
     </li>
     <li>
-      <cite id="bib-SMPTE-st429-18-2023">SMPTE ST 429-18:2023</cite>, D-Cinema Packaging — Immersive 
+      <cite id="bib-SMPTE-st429-18-2023">SMPTE ST 429-18:2023</cite>, D-Cinema Packaging - Immersive
       Audio Track File (2023 Edition)
         <span class="doi">10.5594/SMPTE.ST429-18.2023/</span></li>
       </ul>
diff --git a/test/resources/html/validation/snippet-illegal-chars-invalid.html b/test/resources/html/validation/snippet-illegal-chars-invalid.html
new file mode 100644
index 0000000..16c938a
--- /dev/null
+++ b/test/resources/html/validation/snippet-illegal-chars-invalid.html
@@ -0,0 +1,30 @@
+<!doctype html>
+<html>
+  <head itemscope="itemscope" itemtype="http://smpte.org/standards/documents">
+    <meta charset="utf-8" />
+    <meta http-equiv="x-ua-compatible" content="ie=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script type="module" src="../../../../smpte.js"></script>
+    <meta itemprop="test" content="invalid" />
+    <meta itemprop="pubType" content="ST" />
+    <meta itemprop="pubNumber" content="429" />
+    <meta itemprop="pubPart" content="6" />
+    <meta itemprop="pubSuiteTitle" content="Suite title" />
+    <meta itemprop="pubTC" content="27C" />
+    <meta itemprop="pubStage" content="WD" />
+    <meta itemprop="pubState" content="draft" />
+    <title>Title of the document</title>
+  </head>
+  <body>
+    <section id="sec-scope">
+      <p>This is the scope of the document.</p>
+    </section>
+
+    <section id="sec-snippets-element">
+      <h2>External code snippets</h2>
+      <p>External code snippet that contains illegal characters:</p>
+      <pre data-include="../snippets/illegal-chars.txt"></pre>
+    </section>
+
+  </body>
+</html>
diff --git a/test/src/testValidation.mjs b/test/src/testValidation.mjs
index ce99e29..5d525be 100644
--- a/test/src/testValidation.mjs
+++ b/test/src/testValidation.mjs
@@ -34,7 +34,8 @@ import {smpteValidate, ErrorLogger} from "../../js/validate.mjs";
 const testDirPath = "test/resources/html/validation";
 
 async function _test(filePath) {
-  const dom = new jsdom.JSDOM(fs.readFileSync(filePath));
+  const source = fs.readFileSync(filePath, "utf8");
+  const dom = new jsdom.JSDOM(source);
 
   const expectation = dom.window.document.head.querySelector("meta[itemprop='test']").getAttribute("content");
 
@@ -42,10 +43,13 @@ async function _test(filePath) {
 
   let hasThrown = false;
 
-  const fileExists = (src) => fs.existsSync(path.resolve(path.dirname(filePath), src));
+  const readFile = (src) => {
+    const p = path.resolve(path.dirname(filePath), src);
+    return fs.existsSync(p) ? fs.readFileSync(p, "utf8") : null;
+  };
 
   try {
-    smpteValidate(dom.window.document, logger, fileExists);
+    smpteValidate(dom.window.document, logger, readFile, source);
   } catch (e) {
     logger.error(`Exception: ${e.stack}`);
     hasThrown = true;

Codepoint	Name	Preview	Reference
U+2018	Left Single Quotation Mark	‘	`‘`
U+2019	Right Single Quotation Mark	’	`’`
U+201C	Left Double Quotation Mark	“	`“`
U+201D	Right Double Quotation Mark	”	`”`
U+2013	En Dash	–	`–`
U+2014	Em Dash	—	`—`
U+2011	Non-Breaking Hyphen	‑	`‑`
U+2026	Horizontal Ellipsis	…	`…`
U+00A0	No-Break Space	[ ]	` `
U+00AD	Soft Hyphen	[]	``
U+00A9	Copyright Sign	©	`©`
U+00AE	Registered Sign	®	`®`