From bd1d23799f9e36fda2b3a2043e206b0c0b876265 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 08:19:05 +0200 Subject: [PATCH 1/4] fix: marshal Undefined and SafeString DOM values in Lua `domValue_push` handled `Null`, `Boolean`, `Integer`, `String`, `Array`, and `Object`, and aborted via `MRDOCS_UNREACHABLE` for any other kind. Reading a field whose value is `Undefined` or `SafeString` therefore crashed a Lua script. `Undefined` is common: a symbol with no name (the global namespace, for instance) has an `Undefined` name, so a Lua script that reads `symbol.name` aborted the build. So, map `Undefined` to `nil`, as `Null` already is, and push a `SafeString` as its bytes, the way `String` is handled. This matches the JavaScript bridge. --- src/lib/Support/Lua.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/lib/Support/Lua.cpp b/src/lib/Support/Lua.cpp index 985f229464..d641ec3c4a 100644 --- a/src/lib/Support/Lua.cpp +++ b/src/lib/Support/Lua.cpp @@ -643,11 +643,20 @@ domValue_push( { case dom::Kind::Null: return lua_pushnil(A); + case dom::Kind::Undefined: + // Lua has a single nullary value, so a missing field maps to + // `nil` just as `Null` does. A read of an absent field (for + // example the global namespace's name) yields `Undefined` and + // must not abort. + return lua_pushnil(A); case dom::Kind::Boolean: return lua_pushboolean(A, value.getBool()); case dom::Kind::Integer: return lua_pushnumber(A, value.getInteger()); case dom::Kind::String: + case dom::Kind::SafeString: + // A `SafeString` is a string already marked safe for an output + // format; to a Lua script it is just its bytes. return luaM_pushstring(A, value.getString()); case dom::Kind::Array: return domArray_push(A, value.getArray()); From df5be9bc8b968bba9488cc0256f434882ce0ff55 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 10:05:21 +0200 Subject: [PATCH 2/4] feat: support script-driven generators This adds a generator flavor backed by a user script. A directory under /generator// whose mrdocs-generator.yml names a script entry installs a generator that hands the whole emit to a Lua or JavaScript `generate(corpus, output)` function: the script walks the corpus and writes files through the output object, so it can produce output shapes a per-page generator cannot, such as a single artifact aggregated across every symbol. The manifest parser moves into a shared `GeneratorManifest`, so the data-driven and script-driven discovery passes read the same file. A manifest that names a script is skipped by the data-driven pass and installed by the script pass. The output object exposes a single write method, resolved under the output directory and forbidden from escaping it. Both languages receive it as the second argument to generate; on the Lua side it is also bound as a global and passed from there, because the Lua bridge cannot carry a callable as a plain value. --- src/lib/Gen/GeneratorManifest.cpp | 264 ++++++++++++++++++++++ src/lib/Gen/GeneratorManifest.hpp | 118 ++++++++++ src/lib/Gen/hbs/DataDrivenGenerators.cpp | 182 +++------------ src/lib/Gen/hbs/DataDrivenGenerators.hpp | 4 + src/lib/Gen/script/OutputSink.cpp | 77 +++++++ src/lib/Gen/script/OutputSink.hpp | 63 ++++++ src/lib/Gen/script/ScriptGenerator.cpp | 142 ++++++++++++ src/lib/Gen/script/ScriptGenerator.hpp | 101 +++++++++ src/lib/Gen/script/ScriptGeneratorJs.cpp | 103 +++++++++ src/lib/Gen/script/ScriptGeneratorLua.cpp | 149 ++++++++++++ src/lib/Gen/script/ScriptRunner.hpp | 68 ++++++ src/test/TestRunner.cpp | 8 + src/tool/GenerateAction.cpp | 13 +- 13 files changed, 1136 insertions(+), 156 deletions(-) create mode 100644 src/lib/Gen/GeneratorManifest.cpp create mode 100644 src/lib/Gen/GeneratorManifest.hpp create mode 100644 src/lib/Gen/script/OutputSink.cpp create mode 100644 src/lib/Gen/script/OutputSink.hpp create mode 100644 src/lib/Gen/script/ScriptGenerator.cpp create mode 100644 src/lib/Gen/script/ScriptGenerator.hpp create mode 100644 src/lib/Gen/script/ScriptGeneratorJs.cpp create mode 100644 src/lib/Gen/script/ScriptGeneratorLua.cpp create mode 100644 src/lib/Gen/script/ScriptRunner.hpp diff --git a/src/lib/Gen/GeneratorManifest.cpp b/src/lib/Gen/GeneratorManifest.cpp new file mode 100644 index 0000000000..b5db538493 --- /dev/null +++ b/src/lib/Gen/GeneratorManifest.cpp @@ -0,0 +1,264 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "GeneratorManifest.hpp" +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs { + +namespace { + +// Read a scalar node into an owned string. +std::string +scalarText(llvm::yaml::ScalarNode& node) +{ + llvm::SmallString<32> buf; + llvm::StringRef const text = node.getValue(buf); + return std::string(text.data(), text.size()); +} + +// Forward declaration for the recursive conversion below. +dom::Value yamlToDom(llvm::yaml::Node* node); + +// Add one key/value pair of a YAML mapping to `obj`, recursing on the +// value. A non-scalar key is skipped. +void +addMappingEntry(llvm::yaml::KeyValueNode& entry, dom::Object& obj) +{ + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(entry.getKey()); + if (!keyNode) + { + return; + } + obj.set(scalarText(*keyNode), yamlToDom(entry.getValue())); +} + +// Convert a YAML node to a DOM value. A mapping becomes an object, a +// sequence becomes an array, and every scalar becomes a string: there +// are no type tags here, so a script coerces numeric or boolean +// parameters itself. +dom::Value +yamlToDom(llvm::yaml::Node* node) +{ + if (!node || llvm::isa(node)) + { + return dom::Value(nullptr); + } + if (llvm::yaml::ScalarNode* const scalar = + llvm::dyn_cast(node)) + { + return dom::Value(scalarText(*scalar)); + } + if (llvm::yaml::SequenceNode* const sequence = + llvm::dyn_cast(node)) + { + dom::Array array; + for (llvm::yaml::Node& element : *sequence) + { + array.emplace_back(yamlToDom(&element)); + } + return dom::Value(std::move(array)); + } + llvm::yaml::MappingNode* const mapping = + llvm::dyn_cast(node); + if (!mapping) + { + return dom::Value(nullptr); + } + dom::Object object; + for (llvm::yaml::KeyValueNode& entry : *mapping) + { + addMappingEntry(entry, object); + } + return dom::Value(std::move(object)); +} + +// Parse a YAML mapping whose entries are non-empty byte-sequence keys +// mapped to replacement strings. An empty key is a hard error. +Expected +parseEscape( + llvm::yaml::MappingNode& node, + GeneratorManifest& manifest, + std::string_view yamlPath) +{ + for (llvm::yaml::KeyValueNode& entry : node) + { + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(entry.getKey()); + llvm::yaml::ScalarNode* const valNode = + llvm::dyn_cast_or_null(entry.getValue()); + if (!keyNode || !valNode) + { + return Unexpected(formatError( + "{}: each 'escape' entry must be a scalar->scalar mapping", + yamlPath)); + } + std::string key = scalarText(*keyNode); + if (key.empty()) + { + return Unexpected(formatError( + "{}: escape key must not be empty", yamlPath)); + } + manifest.escape.emplace_back( + std::move(key), scalarText(*valNode)); + } + return {}; +} + +// Dispatch a single top-level manifest key to its handler. Unknown keys +// are ignored so future schema additions stay non-breaking. +Expected +parseTopLevelEntry( + llvm::yaml::KeyValueNode& pair, + GeneratorManifest& manifest, + std::string_view yamlPath) +{ + llvm::yaml::ScalarNode* const keyNode = + llvm::dyn_cast_or_null(pair.getKey()); + if (!keyNode) + { + return {}; + } + llvm::SmallString<16> keyBuf; + llvm::StringRef const key = keyNode->getValue(keyBuf); + if (key == "escape") + { + llvm::yaml::MappingNode* const escNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!escNode) + { + return Unexpected(formatError( + "{}: 'escape' must be a mapping", yamlPath)); + } + return parseEscape(*escNode, manifest, yamlPath); + } + if (key == "script") + { + llvm::yaml::ScalarNode* const valNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!valNode) + { + return Unexpected(formatError( + "{}: 'script' must be a scalar", yamlPath)); + } + manifest.script = scalarText(*valNode); + } + if (key == "params") + { + llvm::yaml::MappingNode* const paramsNode = + llvm::dyn_cast_or_null(pair.getValue()); + if (!paramsNode) + { + return Unexpected(formatError( + "{}: 'params' must be a mapping", yamlPath)); + } + manifest.params = yamlToDom(paramsNode).getObject(); + } + return {}; +} + +} // (anon) + +Expected +loadGeneratorManifest(std::string_view yamlPath) +{ + MRDOCS_TRY(std::string text, files::getFileText(yamlPath)); + llvm::SourceMgr sm; + llvm::yaml::Stream stream(text, sm); + + GeneratorManifest manifest; + llvm::yaml::document_iterator docIt = stream.begin(); + if (docIt == stream.end()) + { + return manifest; + } + llvm::yaml::Node* const rootNode = docIt->getRoot(); + if (rootNode == nullptr || + llvm::isa(rootNode)) + { + // Empty document: a file with no content, only comments, or a + // literal `null`. All of these mean "no rules". + return manifest; + } + llvm::yaml::MappingNode* const root = + llvm::dyn_cast(rootNode); + if (!root) + { + return Unexpected(formatError( + "{}: top-level YAML node must be a mapping", yamlPath)); + } + for (llvm::yaml::KeyValueNode& pair : *root) + { + MRDOCS_TRY(parseTopLevelEntry(pair, manifest, yamlPath)); + } + return manifest; +} + +namespace { + +constexpr std::string_view metadataFileName = "mrdocs-generator.yml"; + +// Append every manifested subdirectory of `generatorDir` to `out`. +Expected +scanGeneratorDir( + std::string_view generatorDir, + std::vector& out) +{ + namespace fs = std::filesystem; + std::error_code iterEc; + fs::directory_iterator const end{}; + for (fs::directory_iterator it(generatorDir, iterEc); + !iterEc && it != end; + it.increment(iterEc)) + { + std::error_code typeEc; + if (!it->is_directory(typeEc)) + { + continue; + } + std::string const dir = it->path().string(); + std::string const yamlPath = files::appendPath( + dir, std::string(metadataFileName)); + if (!files::exists(yamlPath)) + { + continue; + } + MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorManifest(yamlPath)); + out.push_back(DiscoveredManifest{ dir, std::move(manifest) }); + } + return {}; +} + +} // (anon) + +Expected> +discoverGeneratorManifests(std::vector const& roots) +{ + std::vector out; + for (std::string const& root : roots) + { + std::string const dir = files::appendPath(root, "generator"); + if (!files::exists(dir)) + { + continue; + } + MRDOCS_TRY(scanGeneratorDir(dir, out)); + } + return out; +} + +} // mrdocs diff --git a/src/lib/Gen/GeneratorManifest.hpp b/src/lib/Gen/GeneratorManifest.hpp new file mode 100644 index 0000000000..8f797b7e7f --- /dev/null +++ b/src/lib/Gen/GeneratorManifest.hpp @@ -0,0 +1,118 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP +#define MRDOCS_LIB_GEN_GENERATORMANIFEST_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs { + +/** The parsed contents of a generator manifest. + + A manifest is the `mrdocs-generator.yml` that an addon directory + under /generator// ships to declare a generator. The two + generator flavors read disjoint fields of the same file: + + @li A data-driven (Handlebars) generator reads the escape rules. + + @li A script-driven generator reads the entry-file path and its + parameters. + + The presence of the `script` entry is what distinguishes the two: a + manifest that names a `script` is a script-driven generator, + otherwise it is data-driven. +*/ +struct GeneratorManifest +{ + /** The entry file of a script-driven generator. + + Holds the value of the manifest's optional `script` key, a path + relative to the generator directory. Empty when the manifest + declares no `script`, in which case the directory is a + data-driven generator. + */ + std::optional script; + + /** The escape rules of a data-driven generator. + + Each pair maps a byte-sequence source to its replacement string, + in manifest order. Empty when no escape rules are declared. + */ + std::vector> escape; + + /** The parameters of a script-driven generator. + + Holds the manifest's optional `params` mapping, passed to the + entry script as its `params` argument. Mapping values may be + nested objects or arrays; a scalar value is a string. Empty when + the manifest declares no `params`. A data-driven generator + ignores this field. + */ + dom::Object params; +}; + +/** Parse a generator manifest into plain data. + + Read the file at `yamlPath` and return its contents. The file is + expected to contain a top-level mapping. The optional `escape` key + holds a sub-mapping from byte-sequence keys to replacement strings; + keys may be one or more bytes long, and an empty key is a hard error. + The optional `script` key holds the entry-file path as a scalar. The + optional `params` key holds a mapping of generator-specific + parameters; its values may be nested, and a scalar value is read as a + string. Unknown top-level keys are ignored so future schema additions + are non-breaking. + + An empty document (an empty file, comments only, or a literal `null`) + yields an empty manifest. +*/ +Expected +loadGeneratorManifest(std::string_view yamlPath); + +/** A generator directory paired with its parsed manifest. +*/ +struct DiscoveredManifest +{ + /** The generator directory, of the form /generator/. + */ + std::string dir; + + /** The parsed contents of the directory's manifest. + */ + GeneratorManifest manifest; +}; + +/** Find every addon generator directory that ships a manifest. + + For each addon root, walk the immediate subdirectories of + /generator/. A subdirectory is reported when it ships an + `mrdocs-generator.yml`; the manifest is parsed and returned alongside + its directory. Directories without a manifest (the built-in shared + common/ is the canonical example) are skipped. + + The presence of a `script` entry distinguishes the two generator + flavors, so a caller installs the flavor it owns and ignores the + other. Roots are searched in order, so the result preserves addon + precedence. +*/ +Expected> +discoverGeneratorManifests(std::vector const& roots); + +} // mrdocs + +#endif diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.cpp b/src/lib/Gen/hbs/DataDrivenGenerators.cpp index 9dc20f2078..e0002bb9e8 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.cpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.cpp @@ -11,116 +11,30 @@ #include "DataDrivenGenerators.hpp" #include "AddonPaths.hpp" #include "HandlebarsGenerator.hpp" +#include #include #include -#include -#include -#include -#include -#include #include #include #include +#include +#include namespace mrdocs::hbs { namespace { -constexpr std::string_view metadataFileName = "mrdocs-generator.yml"; - -// Populate `map` from a YAML mapping whose entries are non-empty -// byte-sequence keys mapped to replacement strings. An empty key -// is a hard error. -Expected -populateEscapeFromMapping( - llvm::yaml::MappingNode& node, - EscapeMap& map, - std::string_view yamlPath) -{ - for (llvm::yaml::KeyValueNode& entry : node) - { - llvm::yaml::ScalarNode* keyNode = - llvm::dyn_cast_or_null(entry.getKey()); - llvm::yaml::ScalarNode* valNode = - llvm::dyn_cast_or_null(entry.getValue()); - if (!keyNode || !valNode) - { - return Unexpected(formatError( - "{}: each 'escape' entry must be a scalar->scalar mapping", - yamlPath)); - } - llvm::SmallString<8> keyBuf; - llvm::SmallString<32> valBuf; - llvm::StringRef const keyStr = keyNode->getValue(keyBuf); - llvm::StringRef const valStr = valNode->getValue(valBuf); - if (keyStr.empty()) - { - return Unexpected(formatError( - "{}: escape key must not be empty", - yamlPath)); - } - map.set( - std::string_view(keyStr.data(), keyStr.size()), - std::string_view(valStr.data(), valStr.size())); - } - return {}; -} - -// Install a HandlebarsGenerator for the data-driven format in `dir`, -// when `dir` opts in by shipping an `mrdocs-generator.yml`. -// -// The presence of the manifest is the explicit opt-in: a directory -// under /generator/ becomes a generator only when it ships -// this file. Directories that hold shared assets (the built-in -// `common/` is the canonical example) simply don't declare a manifest, -// and discovery skips them. -// -// The generator registry is process-global and is not cleared between -// runs in the same process. `installGenerator` fails when the id is -// already taken, whether by a built-in or by a generator an earlier -// addon root installed under the same name. That is the -// first-writer-wins layering we want, so a duplicate id is a silent -// skip rather than an error (a null generator is the only other -// failure it reports, and we never pass one). In the test executable -// this also means the first test to install an id wins for the rest -// of the process; two fixtures cannot ship competing generators of -// the same name. -Expected -maybeRegister(std::filesystem::path const& dir) -{ - std::string const yamlPath = files::appendPath( - dir.string(), std::string(metadataFileName)); - if (!files::exists(yamlPath)) - { - return {}; - } - std::string const name = dir.filename().string(); - MRDOCS_TRY(EscapeMap escapeMap, loadGeneratorMetadata(yamlPath)); - (void)installGenerator( - std::make_unique( - name, name, name, std::move(escapeMap))); - return {}; -} - -// Scan a single /generator/ directory. -Expected -scanGeneratorDir(std::string_view generatorDir) +// Build an `EscapeMap` from the manifest's ordered `escape` rules. +EscapeMap +toEscapeMap( + std::vector> const& rules) { - namespace fs = std::filesystem; - std::error_code iterEc; - fs::directory_iterator const end{}; - for (fs::directory_iterator it(generatorDir, iterEc); - !iterEc && it != end; - it.increment(iterEc)) + EscapeMap map; + for (std::pair const& rule : rules) { - std::error_code typeEc; - if (!it->is_directory(typeEc)) - { - continue; - } - MRDOCS_TRY(maybeRegister(it->path())); + map.set(rule.first, rule.second); } - return {}; + return map; } } // (anon) @@ -128,69 +42,35 @@ scanGeneratorDir(std::string_view generatorDir) Expected loadGeneratorMetadata(std::string_view yamlPath) { - MRDOCS_TRY(std::string text, files::getFileText(yamlPath)); - llvm::SourceMgr sm; - llvm::yaml::Stream stream(text, sm); - - EscapeMap map; - llvm::yaml::document_iterator docIt = stream.begin(); - if (docIt == stream.end()) - { - return map; - } - llvm::yaml::Node* const rootNode = docIt->getRoot(); - if (rootNode == nullptr || - llvm::isa(rootNode)) - { - // Empty document: file with no content, only comments, or a - // literal `null`. All of these mean "no rules". - return map; - } - llvm::yaml::MappingNode* const root = - llvm::dyn_cast(rootNode); - if (!root) - { - return Unexpected(formatError( - "{}: top-level YAML node must be a mapping", yamlPath)); - } - - for (llvm::yaml::KeyValueNode& pair : *root) - { - llvm::yaml::ScalarNode* const keyNode = - llvm::dyn_cast_or_null(pair.getKey()); - if (!keyNode) - { - continue; - } - llvm::SmallString<16> keyBuf; - if (keyNode->getValue(keyBuf) != "escape") - { - continue; - } - llvm::yaml::MappingNode* const escNode = - llvm::dyn_cast_or_null(pair.getValue()); - if (!escNode) - { - return Unexpected(formatError( - "{}: 'escape' must be a mapping", yamlPath)); - } - MRDOCS_TRY(populateEscapeFromMapping(*escNode, map, yamlPath)); - } - return map; + MRDOCS_TRY(GeneratorManifest manifest, loadGeneratorManifest(yamlPath)); + return toEscapeMap(manifest.escape); } Expected discoverDataDrivenGenerators(Config::Settings const& settings) { - std::vector const roots = addon_paths::addonRoots(settings); - for (std::string const& root : roots) + MRDOCS_TRY( + std::vector found, + discoverGeneratorManifests(addon_paths::addonRoots(settings))); + for (DiscoveredManifest const& d : found) { - std::string const dir = files::appendPath(root, "generator"); - if (!files::exists(dir)) + // A manifest that names a `script` is a script-driven generator; + // that flavor is installed by its own discovery pass. + if (d.manifest.script) { continue; } - MRDOCS_TRY(scanGeneratorDir(dir)); + // The generator registry is process-global and is not cleared + // between runs in the same process. `installGenerator` fails when + // the id is already taken, whether by a built-in or by an + // earlier addon root's generator of the same name. That is the + // first-writer-wins layering we want, so a duplicate id is a + // silent skip rather than an error (a `null` generator is the only + // other failure it reports, and we never pass one). + std::string const name(files::getFileName(d.dir)); + (void)installGenerator( + std::make_unique( + name, name, name, toEscapeMap(d.manifest.escape))); } return {}; } diff --git a/src/lib/Gen/hbs/DataDrivenGenerators.hpp b/src/lib/Gen/hbs/DataDrivenGenerators.hpp index 7146a1f2ed..533fa8238e 100644 --- a/src/lib/Gen/hbs/DataDrivenGenerators.hpp +++ b/src/lib/Gen/hbs/DataDrivenGenerators.hpp @@ -33,6 +33,10 @@ namespace mrdocs::hbs { (the built-in `common/` is the canonical example) don't declare a manifest and are skipped. + 3. Its manifest does not name a `script`. A manifest with a `script` + key declares a script-driven generator, which is installed by + `discoverScriptGenerators` instead, so it is skipped here. + For each accepted directory, a `HandlebarsGenerator` is constructed with id, file extension, and display name all set to ``, and installed into the global registry. Escape rules are read from diff --git a/src/lib/Gen/script/OutputSink.cpp b/src/lib/Gen/script/OutputSink.cpp new file mode 100644 index 0000000000..28ac585b81 --- /dev/null +++ b/src/lib/Gen/script/OutputSink.cpp @@ -0,0 +1,77 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "OutputSink.hpp" +#include +#include +#include + +namespace mrdocs::script { + +OutputSink:: +OutputSink(std::string_view outputDir) + : root_(files::normalizePath(outputDir)) +{ +} + +Expected +OutputSink:: +resolveUnderRoot(std::string_view relPath) const +{ + if (relPath.empty()) + { + return Unexpected(formatError( + "output.write: path must not be empty")); + } + if (files::isAbsolute(relPath)) + { + return Unexpected(formatError( + "output.write: path '{}' must be relative", relPath)); + } + std::string const full = files::normalizePath( + files::appendPath(root_, relPath)); + // `startsWith` enforces a component boundary after the prefix, so the + // root is passed without a trailing separator: a sibling directory + // whose name merely begins with the root (root vs root-x) is not a + // false match. + if (!files::startsWith(full, root_)) + { + return Unexpected(formatError( + "output.write: path '{}' escapes the output directory", + relPath)); + } + return full; +} + +Expected +OutputSink:: +write(std::string_view relPath, std::string_view contents) +{ + MRDOCS_TRY(std::string full, resolveUnderRoot(relPath)); + MRDOCS_TRY(files::createDirectory(files::getParentDir(full))); + + std::ofstream os(full, std::ios::binary | std::ios::trunc); + if (!os) + { + return Unexpected(formatError( + "output.write: cannot open '{}' for writing", full)); + } + os.write( + contents.data(), + static_cast(contents.size())); + if (!os) + { + return Unexpected(formatError( + "output.write: failed writing '{}'", full)); + } + return {}; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/OutputSink.hpp b/src/lib/Gen/script/OutputSink.hpp new file mode 100644 index 0000000000..342d5edbd9 --- /dev/null +++ b/src/lib/Gen/script/OutputSink.hpp @@ -0,0 +1,63 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_OUTPUTSINK_HPP +#define MRDOCS_LIB_GEN_SCRIPT_OUTPUTSINK_HPP + +#include +#include +#include +#include + +namespace mrdocs::script { + +/** The file-writing API handed to a script-driven generator. + + A script-driven generator owns its output structure: it decides + which files to write and what to put in them. This class is the only + door it has to the filesystem, bound into the script as the `write` + method of the `output` object. Every path is resolved under a single + output directory and may not escape it, so "a generator writes files" + does not become "a script writes anywhere on disk". +*/ +class OutputSink +{ + // The output directory, normalized and absolute, without a trailing + // separator. + std::string root_; + + // Resolve `relPath` under the output directory. Reject an empty path, + // an absolute path, or a path that escapes the directory. + Expected + resolveUnderRoot(std::string_view relPath) const; + +public: + /** Construct a sink rooted at the given output directory. + */ + explicit + OutputSink(std::string_view outputDir); + + /** Write `contents` to `relPath`, resolved under the output directory. + + Create any missing parent directories. Reject an absolute path + or one that escapes the output directory. + + @param relPath The destination path, relative to the output + directory. + @param contents The bytes to write. + @return Success, or an error describing why the write failed. + */ + Expected + write(std::string_view relPath, std::string_view contents); +}; + +} // mrdocs::script + +#endif diff --git a/src/lib/Gen/script/ScriptGenerator.cpp b/src/lib/Gen/script/ScriptGenerator.cpp new file mode 100644 index 0000000000..9144174653 --- /dev/null +++ b/src/lib/Gen/script/ScriptGenerator.cpp @@ -0,0 +1,142 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptGenerator.hpp" +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +namespace { + +// Build the read-only corpus DOM a `generate(corpus, output)` entry +// point receives. This mirrors what an extension script sees: a +// `symbols` array of lazy per-symbol objects, each tagged with its flat +// `_id` so a script can form stable per-symbol URLs. +dom::Value +buildScriptCorpus(Corpus const& corpus, DomCorpus const& domCorpus) +{ + dom::Array symbols; + for (Symbol const& sym : corpus) + { + dom::Value value = domCorpus.get(sym.id); + value.getObject().set("_id", toBase16Str(sym.id)); + symbols.emplace_back(std::move(value)); + } + dom::Object corpusObj; + corpusObj.set("symbols", std::move(symbols)); + return dom::Value(std::move(corpusObj)); +} + +} // (anon) + +ScriptGenerator:: +ScriptGenerator(std::string id, std::string scriptPath, dom::Object params) + : id_(std::move(id)) + , scriptPath_(std::move(scriptPath)) + , params_(std::move(params)) +{ +} + +std::string_view +ScriptGenerator:: +id() const noexcept +{ + return id_; +} + +std::string_view +ScriptGenerator:: +displayName() const noexcept +{ + return id_; +} + +std::string_view +ScriptGenerator:: +fileExtension() const noexcept +{ + // A script-driven generator names its own output files, so it has + // no single extension. Report the id for diagnostics. + return id_; +} + +Expected +ScriptGenerator:: +build(std::string_view outputPath, Corpus const& corpus) const +{ + OutputSink sink(outputPath); + DomCorpus domCorpus(corpus); + dom::Value corpusValue = buildScriptCorpus(corpus, domCorpus); + dom::Value const config(corpus.config.object()); + dom::Value const params(params_); + if (scriptPath_.ends_with(".lua")) + { + return runLuaGenerator( + corpusValue, scriptPath_, sink, config, params); + } + if (scriptPath_.ends_with(".js")) + { + return runJsGenerator( + corpusValue, scriptPath_, sink, config, params); + } + return Unexpected(formatError( + "generator '{}': script '{}' must be a .lua or .js file", + id_, scriptPath_)); +} + +Expected +ScriptGenerator:: +buildOne(std::ostream&, Corpus const&) const +{ + return Unexpected(formatError( + "generator '{}' is script-driven and does not support " + "single-page output", id_)); +} + +Expected +discoverScriptGenerators(Config::Settings const& settings) +{ + MRDOCS_TRY( + std::vector found, + discoverGeneratorManifests(hbs::addon_paths::addonRoots(settings))); + for (DiscoveredManifest const& d : found) + { + // Only manifests that name a `script` are script-driven + // generators; the data-driven pass installs the rest. + if (!d.manifest.script) + { + continue; + } + std::string const name(files::getFileName(d.dir)); + std::string scriptPath = files::appendPath(d.dir, *d.manifest.script); + // First-writer-wins, exactly as the data-driven pass: a + // duplicate id is a silent skip, and we never pass a `null`. + (void)installGenerator( + std::make_unique( + name, std::move(scriptPath), d.manifest.params)); + } + return {}; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGenerator.hpp b/src/lib/Gen/script/ScriptGenerator.hpp new file mode 100644 index 0000000000..ff7a2aa44b --- /dev/null +++ b/src/lib/Gen/script/ScriptGenerator.hpp @@ -0,0 +1,101 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP +#define MRDOCS_LIB_GEN_SCRIPT_SCRIPTGENERATOR_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +/** A generator whose output is produced by a user script. + + A script-driven generator hands the whole emit to a Lua or + JavaScript entry point of the form + `generate(corpus, output, config, params)`: the script traverses the + corpus and writes whatever files it wants through the `output` + object, optionally reading the resolved `config` and its own + `params`. Because the script owns the output structure, it can + produce shapes the per-page generators cannot, such as a single + artifact aggregated across all symbols (a search index, for example). +*/ +class ScriptGenerator + : public Generator +{ + std::string id_; + // The absolute path to the Lua or JavaScript entry script. + std::string scriptPath_; + // The generator's own parameters, from the manifest's `params` + // mapping; passed to the entry script as its `params` argument. + dom::Object params_; + +public: + /** Construct a script-driven generator. + + @param id The generator id, used to select it on the command + line. + @param scriptPath The absolute path to the entry script. + @param params The generator's own parameters, from its manifest. + */ + ScriptGenerator( + std::string id, + std::string scriptPath, + dom::Object params); + + std::string_view + id() const noexcept override; + + std::string_view + displayName() const noexcept override; + + std::string_view + fileExtension() const noexcept override; + + /** Run the entry script, which owns the whole emit. + */ + Expected + build( + std::string_view outputPath, + Corpus const& corpus) const override; + + /** Reject single-page output. + + A script-driven generator owns its output structure and writes + whatever files it wants, so there is no single-stream form. + */ + Expected + buildOne( + std::ostream& os, + Corpus const& corpus) const override; +}; + +/** Discover script-driven generators and install them. + + For each configured addon root, walk the immediate subdirectories of + /generator/. A subdirectory becomes a script-driven generator + when its `mrdocs-generator.yml` names an entry script. The generator + id, used to select it on the command line, is the subdirectory name. + + Should be called once after the configuration is resolved and before + a generator is looked up by id. +*/ +Expected +discoverScriptGenerators(Config::Settings const& settings); + +} // mrdocs::script + +#endif diff --git a/src/lib/Gen/script/ScriptGeneratorJs.cpp b/src/lib/Gen/script/ScriptGeneratorJs.cpp new file mode 100644 index 0000000000..674fcc791b --- /dev/null +++ b/src/lib/Gen/script/ScriptGeneratorJs.cpp @@ -0,0 +1,103 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" + +#include +#include +#include + +#include +#include + +namespace mrdocs::script { + +namespace { + +// Build the `output` object passed as the second argument to `generate`. +// The JS wrapper exposes a `dom::Function` as a callable proxy, so unlike +// the Lua side this needs no escape hatch: `write` is a variadic +// invocable that routes to the sink. The sink outlives the call (it is a +// local in `runJsGenerator`), so capturing it by pointer is safe. +dom::Object +buildJsOutputApi(OutputSink& sink) +{ + OutputSink* sinkPtr = &sink; + dom::Object api; + api.set("write", dom::Value(dom::makeVariadicInvocable( + [sinkPtr](dom::Array const& args) -> Expected + { + if (args.size() < 2) + { + return Unexpected(Error( + "output.write: expected (path, contents)")); + } + dom::Value const path = args.get(0); + dom::Value const body = args.get(1); + if (!path.isString() || !body.isString()) + { + return Unexpected(Error( + "output.write: path and contents must be strings")); + } + Expected result = sinkPtr->write( + path.getString().get(), body.getString().get()); + if (!result) + { + return Unexpected(result.error()); + } + return dom::Value(); + }))); + return api; +} + +} // (anon) + +Expected +runJsGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params) +{ + js::Context ctx; + js::Scope scope(ctx); + + MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); + if (Expected exp = scope.script(script); !exp) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, exp.error().message())); + } + + // Unlike an extension, a generator must define `generate`: the user + // selected this generator expecting output. + Expected fn = scope.getGlobal("generate"); + if (!fn || !fn->isFunction()) + { + return Unexpected(formatError( + "generator '{}': script must define a 'generate' function", + scriptPath)); + } + + Expected result = + fn->call(corpus, buildJsOutputApi(sink), config, params); + if (!result) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, result.error().message())); + } + return {}; +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptGeneratorLua.cpp b/src/lib/Gen/script/ScriptGeneratorLua.cpp new file mode 100644 index 0000000000..ef82c31a74 --- /dev/null +++ b/src/lib/Gen/script/ScriptGeneratorLua.cpp @@ -0,0 +1,149 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include "ScriptRunner.hpp" +#include "OutputSink.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +extern "C" { +#include +#include +} + +namespace mrdocs::script { + +namespace { + +// Lua adapter for `OutputSink::write`. On failure the script aborts via +// `luaL_error`; the host turns that into an `Unexpected` when `lua_pcall` +// returns non-OK. The sink pointer is carried as the closure's single +// upvalue. +int +luaWrite(lua_State* L) +{ + OutputSink* sink = static_cast( + lua_touserdata(L, lua_upvalueindex(1))); + if (lua_type(L, 1) != LUA_TSTRING || + lua_type(L, 2) != LUA_TSTRING) + { + return luaL_error(L, + "output.write: expected (string path, string contents)"); + } + std::size_t pathLen = 0; + char const* pathData = lua_tolstring(L, 1, &pathLen); + std::size_t bodyLen = 0; + char const* bodyData = lua_tolstring(L, 2, &bodyLen); + + Expected result = sink->write( + std::string_view(pathData, pathLen), + std::string_view(bodyData, bodyLen)); + if (!result) + { + return luaL_error(L, "%s", result.error().message().c_str()); + } + return 0; +} + +// Build the `output` global table and bind its `write` method. +// +// We register the C closure directly on the raw `lua_State` (via the +// `Context::nativeState()` escape hatch) because the wrapper cannot carry +// a native callable through a DOM value: `domValue_push` has no function +// case. The closure carries the sink pointer as its single upvalue. +void +registerLuaOutputApi(lua_State* L, OutputSink& sink) +{ + lua_newtable(L); + + lua_pushlightuserdata(L, &sink); + lua_pushcclosure(L, &luaWrite, 1); + lua_setfield(L, -2, "write"); + + lua_setglobal(L, "output"); +} + +} // (anon) + +Expected +runLuaGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params) +{ + lua::Context ctx; + + // Register the `output` global before loading the script so + // top-level code can reference it, and so we can pass it as the + // second argument below. + registerLuaOutputApi( + static_cast(ctx.nativeState()), sink); + + lua::Scope scope(ctx); + MRDOCS_TRY(std::string script, files::getFileText(scriptPath)); + MRDOCS_TRY(lua::Function chunk, scope.loadChunk(script, scriptPath)); + + Expected chunkResult = chunk.call(); + if (!chunkResult) + { + return Unexpected(chunkResult.error()); + } + + // Fetch the `output` global so it can be passed as the second + // argument. It must outlive the `generate` call below, so hold it + // here rather than moving it out. + Expected output = scope.getGlobal("output"); + if (!output) + { + return Unexpected(output.error()); + } + + auto callGenerate = + [&](lua::Function&& fn) -> Expected + { + Expected result = + fn.call(corpus, *output, config, params); + if (!result) + { + return Unexpected(formatError( + "generator '{}': {}", + scriptPath, result.error().message())); + } + return {}; + }; + + // Resolve `generate` the same way extension scripts resolve their + // hook: prefer the chunk's return value, fall back to a same-named + // global. Unlike an extension, a generator must define one: the user + // selected this generator expecting output. + if (chunkResult->isFunction()) + { + return callGenerate(lua::Function(std::move(*chunkResult))); + } + Expected global = scope.getGlobal("generate"); + if (!global || !global->isFunction()) + { + return Unexpected(formatError( + "generator '{}': script must define a 'generate' function", + scriptPath)); + } + return callGenerate(lua::Function(std::move(*global))); +} + +} // mrdocs::script diff --git a/src/lib/Gen/script/ScriptRunner.hpp b/src/lib/Gen/script/ScriptRunner.hpp new file mode 100644 index 0000000000..a1dae4ac4f --- /dev/null +++ b/src/lib/Gen/script/ScriptRunner.hpp @@ -0,0 +1,68 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#ifndef MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP +#define MRDOCS_LIB_GEN_SCRIPT_SCRIPTRUNNER_HPP + +#include +#include +#include +#include + +namespace mrdocs::script { + +class OutputSink; + +/** Run a Lua entry script's `generate(corpus, output, config, params)`. + + Build a Lua context, expose the output writer as the `output` global, + evaluate the script, and call its `generate` function with the + corpus, the writer, the resolved configuration, and the generator's + own parameters. A missing `generate` function is an error. + + @param corpus The read-only corpus DOM passed as the first argument. + @param scriptPath The absolute path to the Lua entry script. + @param sink The file-writing API exposed to the script. + @param config The resolved configuration DOM, as templates see it. + @param params The generator's own parameters, from its manifest. +*/ +Expected +runLuaGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params); + +/** Run a JS entry script's `generate(corpus, output, config, params)`. + + Build a JavaScript context, evaluate the script, and call its + `generate` function with the corpus, an `output` object whose + `write` method routes to the writer, the resolved configuration, and + the generator's own parameters. A missing `generate` function is an + error. + + @param corpus The read-only corpus DOM passed as the first argument. + @param scriptPath The absolute path to the JavaScript entry script. + @param sink The file-writing API exposed to the script. + @param config The resolved configuration DOM, as templates see it. + @param params The generator's own parameters, from its manifest. +*/ +Expected +runJsGenerator( + dom::Value const& corpus, + std::string const& scriptPath, + OutputSink& sink, + dom::Value const& config, + dom::Value const& params); + +} // mrdocs::script + +#endif diff --git a/src/test/TestRunner.cpp b/src/test/TestRunner.cpp index 132fc2ecb7..80c44f1492 100644 --- a/src/test/TestRunner.cpp +++ b/src/test/TestRunner.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,13 @@ handleFile( { return report::error("{}: \"{}\"", discovered.error(), filePath); } + Expected scriptsDiscovered = + script::discoverScriptGenerators(loaded->settings); + if (!scriptsDiscovered) + { + return report::error( + "{}: \"{}\"", scriptsDiscovered.error(), filePath); + } Generator const* gen = findGenerator(genId_); if (!gen) { diff --git a/src/tool/GenerateAction.cpp b/src/tool/GenerateAction.cpp index d6b6fed4f1..5176f968a9 100644 --- a/src/tool/GenerateAction.cpp +++ b/src/tool/GenerateAction.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -49,14 +50,16 @@ DoGenerateAction( // -------------------------------------------------------------- // - // Discover data-driven generators + // Discover addon-defined generators // // -------------------------------------------------------------- - // Each /generator// directory that ships its own - // Handlebars layouts is registered as an additional generator - // (subject to id and layout-template checks) before the user- - // requested generator is looked up below. + // Each /generator// directory that ships an + // `mrdocs-generator.yml` is registered as an additional generator + // before the user-requested generator is looked up below. A manifest + // that declares `escape` rules is a data-driven Handlebars generator; + // a manifest that names a `script` is a script-driven generator. MRDOCS_TRY(hbs::discoverDataDrivenGenerators(config->settings())); + MRDOCS_TRY(script::discoverScriptGenerators(config->settings())); // -------------------------------------------------------------- // From ed64d23e01fc3336312ce5f6d3840af6b7d10171 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 11:59:34 +0200 Subject: [PATCH 3/4] test: add unit tests for script-driven generators This covers discovery (a script manifest installs a `ScriptGenerator`), the output writer (writes under the root, rejects absolute and escaping paths), and both runners against a synthetic corpus, asserting the file they emit. A regression test reads a symbol with no name field, exercising the `Undefined`-to-`nil` marshalling a real corpus needs. --- src/test/lib/Gen/script/ScriptGenerator.cpp | 482 ++++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 src/test/lib/Gen/script/ScriptGenerator.cpp diff --git a/src/test/lib/Gen/script/ScriptGenerator.cpp b/src/test/lib/Gen/script/ScriptGenerator.cpp new file mode 100644 index 0000000000..e43d6ecf12 --- /dev/null +++ b/src/test/lib/Gen/script/ScriptGenerator.cpp @@ -0,0 +1,482 @@ +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (c) 2026 Gennaro Prota (gennaro.prota@gmail.com) +// +// Official repository: https://github.com/cppalliance/mrdocs +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mrdocs::script { + +namespace { + +// Write `content` verbatim to `path`. Pre-existing files are truncated. +void +writeFile(std::string_view path, std::string_view content) +{ + std::ofstream os(std::string{path}, std::ios::binary | std::ios::trunc); + os.write(content.data(), + static_cast(content.size())); +} + +// The `config` and `params` arguments a generator receives. Tests that +// exercise only the `corpus` and `output` path pass empty objects. +dom::Value +emptyObject() +{ + return dom::Value(dom::Object()); +} + +// A minimal `Config` whose `object()` returns a canned DOM object. It +// lets a `build()`-level test assert what `generate` sees as `config` +// without standing up a real `ConfigImpl`. +struct StubConfig + : Config +{ + Config::Settings settings_; + dom::Object configObject; + mutable ThreadPool pool; + + ThreadPool& + threadPool() const noexcept override + { + return pool; + } + Config::Settings const& + settings() const noexcept override + { + return settings_; + } + dom::Object const& + object() const override + { + return configObject; + } +}; + +// An empty corpus: `build()` iterates no symbols, so it never reflects +// a real `Symbol`; it carries only the `Config` that `build()` reads +// `config` from. +struct StubCorpus + : Corpus +{ + explicit + StubCorpus(Config const& config) + : Corpus(config) + { + } + + static Symbol const* + noNext(Corpus const*, Symbol const*) + { + return nullptr; + } + iterator + begin() const noexcept override + { + return iterator(this, nullptr, &noNext); + } + iterator + end() const noexcept override + { + return iterator(this, nullptr, &noNext); + } + Expected + lookup(SymbolID const&, std::string_view) const override + { + return Unexpected(Error("stub corpus has no symbols")); + } + Symbol const* + find(SymbolID const&) const noexcept override + { + return nullptr; + } + void + qualifiedName(Symbol const&, std::string&) const override + { + } + void + qualifiedName(Symbol const&, SymbolID const&, std::string&) const override + { + } +}; + +// A two-symbol corpus shaped like what `generate(corpus, output)` sees: +// a `symbols` array whose entries carry a `name` and a flat `_id`. +dom::Value +makeCorpus() +{ + dom::Object foo; + foo.set("name", std::string("foo")); + foo.set("_id", std::string("0001")); + dom::Object bar; + bar.set("name", std::string("bar")); + bar.set("_id", std::string("0002")); + dom::Array symbols; + symbols.emplace_back(dom::Value(std::move(foo))); + symbols.emplace_back(dom::Value(std::move(bar))); + dom::Object corpus; + corpus.set("symbols", std::move(symbols)); + return dom::Value(std::move(corpus)); +} + +// A Lua generator that emits one aggregated artifact across all symbols, +// the canonical thing a per-page generator cannot produce. +constexpr std::string_view luaIndex = R"LUA( +return function(corpus, output) + local parts = {} + for _, sym in ipairs(corpus.symbols) do + parts[#parts + 1] = '{"name":"' .. sym.name .. '","id":"' .. sym._id .. '"}' + end + output.write("search-index.json", "[" .. table.concat(parts, ",") .. "]") +end +)LUA"; + +// The same generator in JavaScript, using the global-function shape. +constexpr std::string_view jsIndex = R"JS( +function generate(corpus, output) { + var parts = []; + for (var i = 0; i < corpus.symbols.length; i++) { + var s = corpus.symbols[i]; + parts.push('{"name":"' + s.name + '","id":"' + s._id + '"}'); + } + output.write("search-index.json", "[" + parts.join(",") + "]"); +} +)JS"; + +constexpr std::string_view expectedJson = + R"([{"name":"foo","id":"0001"},{"name":"bar","id":"0002"}])"; + +} // (anon) + +struct ScriptGeneratorTest +{ + // + // OutputSink + // + + void + testSinkWritesUnderRoot() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // A nested relative path is created and written. + BOOST_TEST(sink.write("a/b/out.txt", "hello").has_value()); + Expected got = + files::getFileText(files::appendPath(td.path(), "a", "b", "out.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "hello"); + } + } + + void + testSinkRejectsAbsolutePath() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // An absolute path is rejected even when it points inside root. + std::string const abs = files::appendPath(td.path(), "x.txt"); + BOOST_TEST(!sink.write(abs, "no").has_value()); + } + + void + testSinkRejectsEscape() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + OutputSink sink(td.path()); + // A path that climbs out of the output directory is rejected. + BOOST_TEST(!sink.write("../escaped.txt", "no").has_value()); + } + + // + // runLuaGenerator / runJsGenerator + // + + void + testLuaGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, luaIndex); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runLuaGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "search-index.json")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == expectedJson); + } + } + + void + testJsGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.js"); + writeFile(script, jsIndex); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runJsGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "search-index.json")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == expectedJson); + } + } + + void + testLuaReadsMissingFieldAsNil() + { + // A symbol object without a `name` field: `get("name")` yields + // `Undefined`, which Lua must marshal as `nil` rather than abort. + // The global namespace has no name, so a real corpus hits this. + dom::Object noName; + noName.set("_id", std::string("0009")); + dom::Array symbols; + symbols.emplace_back(dom::Value(std::move(noName))); + dom::Object corpusObj; + corpusObj.set("symbols", std::move(symbols)); + dom::Value const corpus(std::move(corpusObj)); + + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +return function(corpus, output) + local s = corpus.symbols[1] + output.write("out.txt", "name=" .. (s.name or "NONE")) +end +)LUA"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + Expected result = runLuaGenerator( + corpus, script, sink, emptyObject(), emptyObject()); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "out.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "name=NONE"); + } + } + + void + testMissingGenerateIsError() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "empty.lua"); + writeFile(script, "-- this script defines no generate function\n"); + OutputSink sink(files::appendPath(td.path(), "out")); + // A generator must define `generate`; its absence is an error. + BOOST_TEST(!runLuaGenerator( + makeCorpus(), script, sink, emptyObject(), emptyObject()) + .has_value()); + } + + void + testLuaReceivesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +return function(corpus, output, config, params) + output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) +end +)LUA"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + dom::Object config; + config.set("multipage", true); + dom::Object params; + params.set("greeting", std::string("hi")); + Expected result = runLuaGenerator( + makeCorpus(), script, sink, + dom::Value(std::move(config)), dom::Value(std::move(params))); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + void + testJsReceivesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.js"); + writeFile(script, R"JS( +function generate(corpus, output, config, params) { + output.write("o.txt", String(config.multipage) + "|" + params.greeting); +} +)JS"); + std::string const outDir = files::appendPath(td.path(), "out"); + OutputSink sink(outDir); + + dom::Object config; + config.set("multipage", true); + dom::Object params; + params.set("greeting", std::string("hi")); + Expected result = runJsGenerator( + makeCorpus(), script, sink, + dom::Value(std::move(config)), dom::Value(std::move(params))); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + // The full `build()` path: `config` comes from + // `corpus.config.object()` and `params` from the generator's + // manifest, both reaching the script. + void + testBuildPassesConfigAndParams() + { + ScopedTempDirectory td("mrdocs-scriptgen-build"); + BOOST_TEST(td); + std::string const script = files::appendPath(td.path(), "g.lua"); + writeFile(script, R"LUA( +function generate(corpus, output, config, params) + output.write("o.txt", tostring(config.multipage) .. "|" .. params.greeting) +end +)LUA"); + + StubConfig config; + config.configObject.set("multipage", true); + StubCorpus corpus(config); + + dom::Object params; + params.set("greeting", std::string("hi")); + ScriptGenerator gen("build-selftest", script, std::move(params)); + + std::string const outDir = files::appendPath(td.path(), "out"); + Expected result = gen.build(outDir, corpus); + BOOST_TEST(result.has_value()); + Expected got = + files::getFileText(files::appendPath(outDir, "o.txt")); + BOOST_TEST(got.has_value()); + if (got) + { + BOOST_TEST(*got == "true|hi"); + } + } + + // + // discoverScriptGenerators + // + + void + testDiscoveryRegistersScriptGenerator() + { + ScopedTempDirectory td("mrdocs-scriptgen-disc"); + BOOST_TEST(td); + // Lay out /generator// with a script manifest. The id + // is unusual so it does not collide with the process-global + // registry shared across the test binary. + std::string const id = "mrdocs-script-generator-selftest"; + std::string const genDir = + files::appendPath(td.path(), "generator", id); + BOOST_TEST(files::createDirectory(genDir).has_value()); + writeFile( + files::appendPath(genDir, "mrdocs-generator.yml"), + "script: g.lua\n"); + writeFile(files::appendPath(genDir, "g.lua"), luaIndex); + + Config::Settings settings; + settings.addons = std::string(td.path()); + BOOST_TEST(discoverScriptGenerators(settings).has_value()); + BOOST_TEST(findGenerator(id) != nullptr); + } + + void + testManifestParamsParsed() + { + ScopedTempDirectory td("mrdocs-scriptgen-manifest"); + BOOST_TEST(td); + std::string const yml = + files::appendPath(td.path(), "mrdocs-generator.yml"); + writeFile(yml, "script: g.lua\nparams:\n greeting: hi\n"); + Expected manifest = loadGeneratorManifest(yml); + BOOST_TEST(manifest.has_value()); + if (manifest) + { + dom::Value const greeting = manifest->params.get("greeting"); + BOOST_TEST(greeting.isString()); + if (greeting.isString()) + { + BOOST_TEST(greeting.getString().get() == "hi"); + } + } + } + + void + run() + { + testSinkWritesUnderRoot(); + testSinkRejectsAbsolutePath(); + testSinkRejectsEscape(); + testLuaGenerator(); + testJsGenerator(); + testLuaReadsMissingFieldAsNil(); + testMissingGenerateIsError(); + testLuaReceivesConfigAndParams(); + testJsReceivesConfigAndParams(); + testBuildPassesConfigAndParams(); + testDiscoveryRegistersScriptGenerator(); + testManifestParamsParsed(); + } +}; + +TEST_SUITE( + ScriptGeneratorTest, + "clang.mrdocs.script.ScriptGenerator"); + +} // namespace mrdocs::script From 658ce97a848d04a21aad73055218738486881e73 Mon Sep 17 00:00:00 2001 From: Gennaro Prota Date: Wed, 3 Jun 2026 13:01:07 +0200 Subject: [PATCH 4/4] docs: document script-driven generators --- docs/modules/ROOT/pages/generators.adoc | 2 + .../ROOT/pages/script-driven-generators.adoc | 84 +++++++++++++++++++ docs/mrdocs.schema.json | 2 +- src/lib/ConfigOptions.json | 2 +- 4 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 docs/modules/ROOT/pages/script-driven-generators.adoc diff --git a/docs/modules/ROOT/pages/generators.adoc b/docs/modules/ROOT/pages/generators.adoc index 5b1a5860ad..59f7354e1c 100644 --- a/docs/modules/ROOT/pages/generators.adoc +++ b/docs/modules/ROOT/pages/generators.adoc @@ -200,6 +200,8 @@ Unknown top-level keys are silently ignored so future schema additions stay non- If the same id appears under more than one addon root, the first one wins: that root's manifest sets the format's escape rules. Later roots can still contribute layered partials and helpers under the same id through the existing template-loading path, so a project can supplement a shared format without redefining it. +To add a generator that builds its output structure imperatively, rather than rendering one page per symbol from templates, see xref:script-driven-generators.adoc[Script-driven generators]. + == Stylesheet Options The HTML and AsciiDoc generators ship a bundled stylesheet that is inlined by default. You can replace or layer styles with the following options (available in config files and on the CLI): diff --git a/docs/modules/ROOT/pages/script-driven-generators.adoc b/docs/modules/ROOT/pages/script-driven-generators.adoc new file mode 100644 index 0000000000..420974abe1 --- /dev/null +++ b/docs/modules/ROOT/pages/script-driven-generators.adoc @@ -0,0 +1,84 @@ += Script-driven generators + +A data-driven generator renders one page per symbol from templates. When you need a different output structure - one file per namespace, or a single artifact aggregated across every symbol, such as a search index - a template generator cannot express it, because the page-per-symbol shape is fixed by the host. A script-driven generator hands the whole emit to a Lua or JavaScript script, which traverses the corpus and writes whatever files it wants. No C++ and no templates are involved. + +A generator directory is script-driven when its `mrdocs-generator.yml` names an entry script: + +[source,yaml] +---- +script: generator.lua +---- + +The `script` key holds a path to a Lua (`.lua`) or JavaScript (`.js`) file, relative to the generator directory. Naming a script is what distinguishes the two flavors: a manifest with a `script` key is script-driven, otherwise the directory is a data-driven (template) generator. As with template generators, the directory name is the generator id you select with `--generator`. + +== The `generate` entry point + +The script defines a single entry point, a function named `generate`: + +[source,lua] +---- +function generate(corpus, output, config, params) + -- ... +end +---- + +`corpus.symbols` is the array of every symbol. Each symbol carries the same fields the template and helper layers see, plus a flat `_id` string suitable as a stable per-symbol URL fragment. + +`output.write(relativePath, contents)` writes one file under the configured output directory - the `--output` path on the command line, or the `output` key in the config file, the same location the built-in generators write to. The path is resolved relative to that directory and may not escape it; an absolute path or one that climbs above the output directory is rejected. Parent directories are created as needed. + +Because the script owns the output, it also owns what a per-page generator would otherwise do for it: the URLs it emits, and any escaping of the content it writes. The host does not apply an escape map to a script-driven generator's output. + +`config` is the resolved configuration: the same object the templates receive, holding every value from the config file and the command line. See xref:config-file.adoc[the configuration reference] for the available keys. + +`params` is this generator's own parameters, read from the optional `params:` mapping in its `mrdocs-generator.yml`. A scalar value is a string (a script coerces numbers or booleans itself); nested mappings and sequences become objects and arrays. It is an empty object when the manifest declares no parameters. For example: + +[source,yaml] +---- +script: generator.lua +params: + title: API Reference +---- + +makes `params.title` available to the script. + +`config` and `params` are trailing arguments, so a generator that needs neither can omit them: `function generate(corpus, output)`. + +JavaScript looks up `generate` as a global function, so a JavaScript generator must define one (`function generate(corpus, output, config, params) { ... }`). Lua accepts that same named-global form, or - equivalently - the function value the chunk returns: + +[source,lua] +---- +-- Equivalent, in Lua, to a global function named generate. +return function(corpus, output, config, params) + -- ... +end +---- + +Unlike a corpus-transform extension, whose hook is optional, a generator must define a `generate` function: selecting the generator is a request for output, so a missing entry point is an error. + +== Example: a search index + +This generator emits a single search-index.json aggregating every symbol, an artifact no per-page generator can produce: + +[source,lua] +---- +-- Quote a string as a JSON value. +local function json_string(s) + s = s:gsub('\\', '\\\\'):gsub('"', '\\"') + return '"' .. s .. '"' +end + +function generate(corpus, output) + local entries = {} + for _, sym in ipairs(corpus.symbols) do + local name = sym.name or "" + if name ~= "" then + entries[#entries + 1] = + '{"name":' .. json_string(name) .. + ',"url":' .. json_string(sym._id .. ".html") .. "}" + end + end + output.write( + "search-index.json", + "[" .. table.concat(entries, ",") .. "]") +end +---- diff --git a/docs/mrdocs.schema.json b/docs/mrdocs.schema.json index e72060fe0b..2e0bf61aa1 100644 --- a/docs/mrdocs.schema.json +++ b/docs/mrdocs.schema.json @@ -252,7 +252,7 @@ }, "generator": { "default": "adoc", - "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, and `xml`; data-driven generators can be added by dropping a template folder under /generator//.", + "description": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, and `xml`; data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output.", "title": "Generator used to create the documentation", "type": "string" }, diff --git a/src/lib/ConfigOptions.json b/src/lib/ConfigOptions.json index c441b29a5a..efd2ce3c9f 100644 --- a/src/lib/ConfigOptions.json +++ b/src/lib/ConfigOptions.json @@ -397,7 +397,7 @@ { "name": "generator", "brief": "Generator used to create the documentation", - "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, and `xml`; data-driven generators can be added by dropping a template folder under /generator//.", + "details": "The generator is responsible for creating the documentation from the extracted symbols. The generator uses the extracted symbols and the templates to create the documentation. The built-in generators include `adoc`, `html`, and `xml`; data-driven generators can be added by dropping a template folder under /generator//; script-driven generators instead ship a Lua or JavaScript script that produces the output.", "type": "string", "default": "adoc" },