diff --git a/src/html.js b/src/html.js
new file mode 100644
index 000000000..80f37d133
--- /dev/null
+++ b/src/html.js
@@ -0,0 +1,547 @@
+/**
+ * @fileoverview A simple HTML parser and helper functions for handling HTML nodes in Markdown rules.
+ * @author Json Miller, Nate Moore, 루밀LuMir(lumirlumir)
+ * @license MIT
+ *
+ * ---
+ *
+ * Portions of this code were borrowed from `ultrahtml` - https://github.com/natemoo-re/ultrahtml
+ *
+ * MIT License Copyright (c) 2022 Nate Moore
+ *
+ * Permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * The above copyright notice and this permission notice
+ * (including the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
+ * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * ---
+ *
+ * Portions of this code were borrowed from `htmlParser` - https://github.com/developit/htmlParser
+ *
+ * The MIT License (MIT) Copyright (c) 2013 Jason Miller
+ *
+ * Permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
+ * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// TODO: if (selector.content === "*") return true;
+// TODO: getElementByTagName, getElementById, getElementsByClassName, getElementByName
+
+//-----------------------------------------------------------------------------
+// Type Definitions
+//-----------------------------------------------------------------------------
+
+/**
+ * @import { HtmlNode, HtmlParentNode, HtmlTextNode, HtmlCommentNode, HtmlDoctypeNode, HtmlDocumentNode } from "./types.js";
+ */
+
+//-----------------------------------------------------------------------------
+// Helpers: Constants
+//-----------------------------------------------------------------------------
+
+/** @type {Set} */
+const VOID_TAGS = new Set([
+ "area",
+ "base",
+ "br",
+ "col",
+ "embed",
+ "hr",
+ "img",
+ "input",
+ "keygen",
+ "link",
+ "meta",
+ "param",
+ "source",
+ "track",
+ "wbr",
+]);
+/** @type {Set} */
+const RAW_TAGS = new Set(["script", "style"]);
+
+const DOM_PARSER_RE =
+ /(?:<(\/?)([a-zA-Z][a-zA-Z0-9:-]*)(?:\s([^>]*?))?((?:\s*\/)?)>|()|())/gmu;
+const ATTR_KEY_IDENTIFIER_RE = /[@.a-z0-9_:-]/iu;
+
+//-----------------------------------------------------------------------------
+// Helpers: Functions
+//-----------------------------------------------------------------------------
+
+/**
+ * TODO
+ * @param {HtmlNode} node TODO
+ * @returns {boolean} TODO
+ */
+function canSelfClose(node) {
+ if (node.children.length === 0) {
+ /** @type {HtmlNode | undefined} */
+ let n = node;
+
+ while ((n = n.parent)) {
+ if (n.name === "svg") {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+//-----------------------------------------------------------------------------
+// Exports
+//-----------------------------------------------------------------------------
+
+/**
+ * Parses a string of HTML attributes into an object.
+ * @param {string} str The string to parse.
+ * @returns {Record} The parsed attributes as an object.
+ * @example
+ * ```js
+ * import { parseAttrs } from "path/to/html.js";
+ *
+ * const attrs = parseAttrs('id="my-id" class="my-class" data-custom="value"');
+ * console.log(attrs);
+ * // { id: "my-id", class: "my-class", data-custom: "value" }
+ * ```
+ *
+ */
+export function parseAttrs(str) {
+ /** @type {Record} */
+ const obj = {};
+
+ if (str) {
+ /** @type {'none' | 'key' | 'value'} */
+ let state = "none";
+ /** @type {string | undefined} */
+ let currentKey;
+ /** @type {string} */
+ let currentValue = "";
+ /** @type {number | undefined} */
+ let tokenStartIndex;
+ /** @type {'"' | "'" | undefined} */
+ let valueDelimiter;
+
+ for (let currentIndex = 0; currentIndex < str.length; currentIndex++) {
+ const currentChar = str[currentIndex];
+
+ if (state === "none") {
+ if (ATTR_KEY_IDENTIFIER_RE.test(currentChar)) {
+ // add attribute
+ if (currentKey) {
+ obj[currentKey] = currentValue;
+ currentKey = undefined;
+ currentValue = "";
+ }
+
+ tokenStartIndex = currentIndex;
+ state = "key";
+ } else if (currentKey && currentChar === "=") {
+ state = "value";
+ }
+ } else if (state === "key") {
+ if (!ATTR_KEY_IDENTIFIER_RE.test(currentChar)) {
+ currentKey = str.slice(tokenStartIndex, currentIndex);
+
+ if (currentChar === "=") {
+ state = "value";
+ } else {
+ state = "none";
+ }
+ }
+ } else {
+ if (
+ valueDelimiter === currentChar &&
+ currentIndex > 0 &&
+ str[currentIndex - 1] !== "\\" // if not escaped
+ ) {
+ currentValue = str.slice(tokenStartIndex, currentIndex);
+ valueDelimiter = undefined;
+ state = "none";
+ } else if (
+ !valueDelimiter &&
+ (currentChar === '"' || currentChar === "'")
+ ) {
+ tokenStartIndex = currentIndex + 1;
+ valueDelimiter = currentChar;
+ }
+ }
+ }
+
+ if (
+ state === "key" &&
+ tokenStartIndex !== undefined &&
+ tokenStartIndex < str.length
+ ) {
+ currentKey = str.slice(tokenStartIndex, str.length);
+ }
+
+ if (currentKey) {
+ obj[currentKey] = currentValue;
+ }
+ }
+
+ return obj;
+}
+
+/**
+ * Stringifies an object of HTML attributes into a string.
+ * @param {Record} attributes The attributes to stringify.
+ * @returns {string} The stringified attributes.
+ */
+export function stringifyAttrs(attributes) {
+ let attrStr = "";
+
+ for (const [key, value] of Object.entries(attributes)) {
+ attrStr += ` ${key}="${value}"`;
+ }
+
+ return attrStr;
+}
+
+/**
+ * The `parse` function takes a string of HTML and returns an AST (Abstract Syntax Tree).
+ * @param {string} input TODO
+ * @returns {HtmlNode} TODO
+ * @example
+ * ```js
+ * import { parse } from "path/to/html.js";
+ *
+ * const ast = parse(`
Hello world!
`);
+ * console.log(ast);
+ * // {
+ * // type: "document",
+ * // children: [
+ * // ...
+ * // ],
+ * // }
+ * ```
+ *
+ */
+export function parse(input) {
+ /** @type {HtmlNode} */
+ let doc;
+ /** @type {HtmlNode} */
+ let parent;
+ /** @type {RegExpExecArray | null} */
+ let token;
+ /** @type {string} */
+ let text;
+ /** @type {number} */
+ let i;
+ /** @type {string} */
+ let bStart;
+ /** @type {string} */
+ let bText;
+ /** @type {string} */
+ let bEnd;
+ /** @type {HtmlNode} */
+ let tag;
+ /** @type {number} */
+ let lastIndex = 0;
+
+ /** @type {string} */
+ const str = input;
+ /** @type {HtmlNode[]} */
+ const tags = [];
+
+ /**
+ * TODO
+ * @returns {void}
+ */
+ function commitTextNode() {
+ // eslint-disable-next-line unicorn/prefer-string-slice, no-restricted-properties -- TODO
+ text = str.substring(
+ lastIndex,
+ DOM_PARSER_RE.lastIndex - token[0].length,
+ );
+
+ if (text) {
+ parent.children.push(
+ /** @type {HtmlTextNode} */ ({
+ type: "text",
+ value: text,
+ parent,
+ }),
+ );
+ }
+ }
+
+ DOM_PARSER_RE.lastIndex = 0;
+
+ parent = doc = /** @type {any} */ ({
+ type: "document",
+ children: [],
+ });
+
+ while ((token = DOM_PARSER_RE.exec(str))) {
+ bStart = token[5] || token[8];
+ bText = token[6] || token[9];
+ bEnd = token[7] || token[10];
+
+ if (RAW_TAGS.has(parent.name) && token[2] !== parent.name) {
+ // eslint-disable-next-line no-useless-assignment -- TODO
+ i = DOM_PARSER_RE.lastIndex - token[0].length;
+ if (parent.children.length > 0) {
+ parent.children[0].value += token[0];
+ }
+ continue;
+ } else if (bStart === "`;
+ case "doctype":
+ return ``;
+ default:
+ throw new Error(`Unknown node type: ${node}`); // TODO
+ }
+}
+
+/**
+ * TODO
+ * @param {HtmlNode} node TODO
+ * @param {string} selector TODO
+ * @returns {HtmlNode[]} TODO
+ */ // eslint-disable-next-line no-unused-vars -- TODO
+export function querySelectorAll(node, selector) {
+ /** @type {HtmlNode[]} */
+ const nodes = [];
+
+ walk(node, n => {
+ if (n && n.type !== "element") {
+ return;
+ }
+ nodes.push(n);
+ });
+
+ return nodes;
+}
diff --git a/src/types.ts b/src/types.ts
index c7b75adf3..0e69a7d93 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,3 +1,8 @@
+/**
+ * @fileoverview Additional types for this package.
+ * @author Nicholas C. Zakas
+ */
+
//------------------------------------------------------------------------------
// Imports
//------------------------------------------------------------------------------
@@ -59,7 +64,7 @@ type WithExit = {
};
//------------------------------------------------------------------------------
-// Exports
+// Exports: Processor
//------------------------------------------------------------------------------
export interface RangeMap {
@@ -78,6 +83,72 @@ export interface Block extends Node, BlockBase {
meta: string | null;
}
+//------------------------------------------------------------------------------
+// Exports: HTML
+//------------------------------------------------------------------------------
+
+export type HtmlNode =
+ | HtmlDocumentNode
+ | HtmlElementNode
+ | HtmlTextNode
+ | HtmlCommentNode
+ | HtmlDoctypeNode;
+
+export type HtmlNodeType =
+ | HtmlDocumentNode["type"]
+ | HtmlElementNode["type"]
+ | HtmlTextNode["type"]
+ | HtmlCommentNode["type"]
+ | HtmlDoctypeNode["type"];
+
+export interface HtmlLocation {
+ start: number;
+ end: number;
+}
+
+interface HtmlBaseNode {
+ type: HtmlNodeType;
+ loc: [HtmlLocation, HtmlLocation];
+ parent: HtmlNode;
+ [key: string]: any;
+}
+
+export interface HtmlLiteralNode extends HtmlBaseNode {
+ value: string;
+}
+
+export interface HtmlParentNode extends HtmlBaseNode {
+ children: HtmlNode[];
+}
+
+export interface HtmlDocumentNode extends Omit {
+ type: "document";
+ attributes: Record;
+ parent: undefined;
+}
+
+export interface HtmlElementNode extends HtmlParentNode {
+ type: "element";
+ name: string;
+ attributes: Record;
+}
+
+export interface HtmlTextNode extends HtmlLiteralNode {
+ type: "text";
+}
+
+export interface HtmlCommentNode extends HtmlLiteralNode {
+ type: "comment";
+}
+
+export interface HtmlDoctypeNode extends HtmlLiteralNode {
+ type: "doctype";
+}
+
+//------------------------------------------------------------------------------
+// Exports: Front Matter
+//------------------------------------------------------------------------------
+
/**
* Markdown TOML.
*/
@@ -116,6 +187,10 @@ export interface Json extends Literal {
*/
export interface JsonData extends Data {}
+//------------------------------------------------------------------------------
+// Exports: Markdown Language
+//------------------------------------------------------------------------------
+
/**
* Language options provided for Markdown files.
*/
@@ -131,6 +206,10 @@ export interface MarkdownLanguageOptions extends LanguageOptions {
*/
export type MarkdownLanguageContext = LanguageContext;
+//------------------------------------------------------------------------------
+// Exports: Markdown Rule Definition
+//------------------------------------------------------------------------------
+
export interface MarkdownRuleVisitor
extends RuleVisitor,
WithExit<
diff --git a/tests/html.test.js b/tests/html.test.js
new file mode 100644
index 000000000..3cc990971
--- /dev/null
+++ b/tests/html.test.js
@@ -0,0 +1,443 @@
+/**
+ * @fileoverview Tests for the html.js
+ * @author Json Miller, Nate Moore, 루밀LuMir(lumirlumir)
+ * @license MIT
+ *
+ * ---
+ *
+ * Portions of this code were borrowed from `ultrahtml` - https://github.com/natemoo-re/ultrahtml
+ *
+ * MIT License Copyright (c) 2022 Nate Moore
+ *
+ * Permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * The above copyright notice and this permission notice
+ * (including the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
+ * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * ---
+ *
+ * Portions of this code were borrowed from `htmlParser` - https://github.com/developit/htmlParser
+ *
+ * The MIT License (MIT) Copyright (c) 2013 Jason Miller
+ *
+ * Permission is hereby granted, free of
+ * charge, to any person obtaining a copy of this software and associated
+ * documentation files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * The above copyright notice and this permission notice
+ * shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
+ * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+//-----------------------------------------------------------------------------
+// Imports
+//-----------------------------------------------------------------------------
+
+import assert from "node:assert";
+import {
+ parseAttrs,
+ parse,
+ walk,
+ render,
+ querySelectorAll,
+} from "../src/html.js";
+
+//-----------------------------------------------------------------------------
+// Tests
+//-----------------------------------------------------------------------------
+
+describe("html", () => {
+ it("sanity", () => {
+ assert.strictEqual(typeof parseAttrs, "function");
+ assert.strictEqual(typeof parse, "function");
+ assert.strictEqual(typeof walk, "function");
+ assert.strictEqual(typeof render, "function");
+ assert.strictEqual(typeof querySelectorAll, "function");
+ });
+
+ describe("parseAttrs()", () => {
+ // Basic
+ it("works for empty string", () => {
+ const attrs = parseAttrs("");
+
+ assert.deepStrictEqual(attrs, {});
+ });
+ it('works for `key=""`', () => {
+ const attrs = parseAttrs('key=""');
+ assert.deepStrictEqual(attrs, { key: "" });
+ });
+ it('works for `key="value"`', () => {
+ const attrs = parseAttrs('key="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it("works for `key=''`", () => {
+ const attrs = parseAttrs("key=''");
+ assert.deepStrictEqual(attrs, { key: "" });
+ });
+ it("works for `key='value'`", () => {
+ const attrs = parseAttrs("key='value'");
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+
+ // Empty value
+ it("works for `key`", () => {
+ const attrs = parseAttrs("key");
+
+ assert.deepStrictEqual(attrs, { key: "" });
+ });
+ it("works for `key1 key2`", () => {
+ const attrs = parseAttrs("key1 key2");
+
+ assert.deepStrictEqual(attrs, { key1: "", key2: "" });
+ });
+
+ // Leading whitespaces
+ it('works for ` key="value"`', () => {
+ const attrs = parseAttrs(' key="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `\tkey="value"`', () => {
+ const attrs = parseAttrs('\tkey="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `\nkey="value"`', () => {
+ const attrs = parseAttrs('\nkey="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `\r\nkey="value"`', () => {
+ const attrs = parseAttrs('\r\nkey="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+
+ // Trailing whitespaces
+ it('works for `key="value" `', () => {
+ const attrs = parseAttrs('key="value" ');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key="value"\t`', () => {
+ const attrs = parseAttrs('key="value"\t');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key="value"\n`', () => {
+ const attrs = parseAttrs('key="value"\n');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key="value"\r\n`', () => {
+ const attrs = parseAttrs('key="value"\r\n');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+
+ // Leading whitespaces around the equal sign
+ it('works for `key ="value"`', () => {
+ const attrs = parseAttrs('key ="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key ="value"`', () => {
+ const attrs = parseAttrs('key ="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key\t="value"`', () => {
+ const attrs = parseAttrs('key\t="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key\n="value"`', () => {
+ const attrs = parseAttrs('key\n="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key\r\n="value"`', () => {
+ const attrs = parseAttrs('key\r\n="value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+
+ // Trailing whitespaces around the equal sign
+ it('works for `key= "value"`', () => {
+ const attrs = parseAttrs('key= "value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key= "value"`', () => {
+ const attrs = parseAttrs('key= "value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key=\t"value"`', () => {
+ const attrs = parseAttrs('key=\t"value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key=\n"value"`', () => {
+ const attrs = parseAttrs('key=\n"value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ it('works for `key=\r\n"value"`', () => {
+ const attrs = parseAttrs('key=\r\n"value"');
+
+ assert.deepStrictEqual(attrs, { key: "value" });
+ });
+ });
+
+ describe("parse(), renderSync()", () => {
+ describe("input === output", () => {
+ it("works for elements", async () => {
+ const input = `
Token CSS is a new tool that seamlessly integrates Design Tokens into your development workflow. Conceptually, it is similar to tools Tailwind, Styled System, and many CSS-in-JS libraries that provide tokenized constraints for your styles—but there\'s one big difference.