diff --git a/CHANGELOG.md b/CHANGELOG.md index 45236b4..13d53cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,22 @@ ## Unreleased +### New Features + +- feat: Add `json-include` whitelist of dot-separated paths to keep in the JSON export. +- feat: Add `json-exclude` blacklist of dot-separated paths to drop from the JSON export. +- feat: Add `json-exclude-sensitive` option (default `true`) to redact host filesystem paths (`quarto.doc.input_file`, `quarto.doc.output_file`, `quarto.project.directory`, `quarto.project.output_directory`, `pandoc.PANDOC_SCRIPT_FILE`). +- feat: Add `json-warn-on-server` option (default `true`) to warn when JSON export is enabled in CI or server contexts. + +### Bug Fixes + +- fix: Coerce boolean metadata robustly, accepting raw Lua booleans, Pandoc `MetaBool`, and the strings `'true'`/`'false'` (case-insensitive). +- fix: Reset module-level configuration state at the start of each `Meta` pass to prevent cross-document leakage in batch renders. + +### Documentation + +- docs: Document the JSON export schema, filtering options, and example coverage for the new filter behaviour. + ## 1.4.1 (2026-04-15) ### Refactoring diff --git a/README.md b/README.md index e6a9a4d..7b7170f 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,70 @@ extensions: json: "custom-path.json" # Exports to "custom-path.json" ``` -The JSON file will contain all the LUA environment metadata collected during document rendering. +The boolean toggle accepts raw YAML booleans (`true`, `false`), their quoted string forms (`"true"`, `"false"`), and a path string for custom filenames. + +### JSON export schema + +The exported JSON has two top-level keys, `pandoc` and `quarto`, that mirror the live Pandoc/Quarto Lua objects: + +```json +{ + "pandoc": { + "FORMAT": "html", + "PANDOC_API_VERSION": "1.23", + "PANDOC_VERSION": "3.6.3", + "PANDOC_READER_OPTIONS": { }, + "PANDOC_WRITER_OPTIONS": { }, + "PANDOC_STATE": { } + }, + "quarto": { + "version": [1, 7, 32], + "doc": { }, + "project": { }, + "log": { }, + "json": { } + } +} +``` + +Functions and userdata values are dropped. +Empty branches are pruned. + +### Filtering the export + +Four sibling options control what ends up in the JSON file: + +- `json-include`: array of dot-separated paths to keep (e.g. `pandoc.FORMAT`). + Everything outside the listed paths is omitted. + Omit to keep every non-excluded path. +- `json-exclude`: array of dot-separated paths to drop. + Applied after the include whitelist. +- `json-exclude-sensitive` (default `true`): redacts built-in sensitive paths that expose host filesystem layout. + The redacted paths are `quarto.doc.input_file`, `quarto.doc.output_file`, `quarto.project.directory`, `quarto.project.output_directory`, and `pandoc.PANDOC_SCRIPT_FILE`. + Set to `false` to include them. +- `json-warn-on-server` (default `true`): emits a warning when JSON export is enabled in a CI or server context (`CI`, `GITHUB_ACTIONS`, `GITLAB_CI`, `CIRCLECI`, `TRAVIS`, `JENKINS_URL`, `BUILDKITE`, `TF_BUILD`). + Set to `false` to silence the warning. + +Example, keeping only the active format and Quarto version: + +```yaml +extensions: + lua-env: + json: true + json-include: + - pandoc.FORMAT + - quarto.version +``` + +Example, removing a noisy branch while keeping everything else: + +```yaml +extensions: + lua-env: + json: true + json-exclude: + - quarto._quarto +``` ## Example diff --git a/_extensions/lua-env/_extension.yml b/_extensions/lua-env/_extension.yml index 2997e59..1526049 100644 --- a/_extensions/lua-env/_extension.yml +++ b/_extensions/lua-env/_extension.yml @@ -5,5 +5,5 @@ quarto-required: ">=1.4.459" contributes: shortcodes: - lua-env-shortcode.lua - filters: + filters: - lua-env-filter.lua diff --git a/_extensions/lua-env/_schema.yml b/_extensions/lua-env/_schema.yml index 02bcd5f..2f504c7 100644 --- a/_extensions/lua-env/_schema.yml +++ b/_extensions/lua-env/_schema.yml @@ -1,4 +1,4 @@ -# _schema.yml for "lua-env" filter and shortcode extension +# Schema for the lua-env extension # Describes options and shortcode arguments for IDE tooling and runtime validation. # Extension-level metadata options. @@ -6,22 +6,42 @@ # extensions: # lua-env: # json: true +# json-include: ["pandoc.FORMAT", "quarto.version"] +# json-exclude: ["quarto._quarto"] +# json-exclude-sensitive: true +# json-warn-on-server: true -$schema: https://m.canouil.dev/quarto-wizard/assets/schema/v1/extension-schema.json +$schema: https://m.canouil.dev/quarto-wizard/assets/schema/v2/extension-schema.json options: json: type: [boolean, string] default: false - description: "Export the Lua environment metadata to a JSON file. Set to true for 'lua-env.json', false to disable, or a custom file path." + description: "Export the Lua environment metadata to a JSON file, using true for 'lua-env.json', false to disable, or a custom file path." + json-include: + type: [string, array] + default: null + description: "Whitelist of dot-separated paths to include in the JSON export, omitting anything outside the listed paths." + json-exclude: + type: [string, array] + default: null + description: "Blacklist of dot-separated paths to omit from the JSON export, applied after the include whitelist." + json-exclude-sensitive: + type: boolean + default: true + description: "Redact built-in sensitive paths that expose host filesystem layout, set to false to include them." + json-warn-on-server: + type: boolean + default: true + description: "Emit a warning when JSON export is enabled in a CI or server context, set to false to silence the warning." # Per-shortcode schemas. # Describes positional arguments for {{< lua-env >}}. shortcodes: lua-env: - description: "Outputs the value of a Lua environment variable from the collected metadata." + description: "Output the value of a Lua environment variable from the collected metadata." arguments: - name: variable type: string required: true - description: "Dot-separated path to the variable (e.g., 'quarto.version', 'pandoc.PANDOC_VERSION')." + description: "Dot-separated path to the variable, such as 'quarto.version' or 'pandoc.PANDOC_VERSION'." diff --git a/_extensions/lua-env/lua-env-filter.lua b/_extensions/lua-env/lua-env-filter.lua index 5b57181..b637ae3 100644 --- a/_extensions/lua-env/lua-env-filter.lua +++ b/_extensions/lua-env/lua-env-filter.lua @@ -9,17 +9,180 @@ local EXTENSION_NAME = 'lua-env' --- Load modules local str = require(quarto.utils.resolve_path('_modules/string.lua'):gsub('%.lua$', '')) local log = require(quarto.utils.resolve_path('_modules/logging.lua'):gsub('%.lua$', '')) -local meta_mod = require(quarto.utils.resolve_path('_modules/metadata.lua'):gsub('%.lua$', '')) local pdoc = require(quarto.utils.resolve_path('_modules/pandoc-helpers.lua'):gsub('%.lua$', '')) --- @type string|nil The JSON file path to export metadata to local json_file = nil +--- @type table|nil Set of dot-separated paths to include (whitelist) +local include_paths = nil + +--- @type table|nil Set of dot-separated paths to exclude (blacklist) +local exclude_paths = nil + +--- @type boolean Whether to redact built-in sensitive paths (absolute filesystem paths) +local exclude_sensitive = true + +--- @type boolean Whether to warn when exporting in detected server/CI contexts +local warn_on_server = true + +--- @type table Set of dot-separated paths considered sensitive by default +--- These leak the host filesystem layout, so are redacted unless the user opts out. +local SENSITIVE_PATHS = { + ['quarto.doc.input_file'] = true, + ['quarto.doc.output_file'] = true, + ['quarto.project.directory'] = true, + ['quarto.project.output_directory'] = true, + ['pandoc.PANDOC_SCRIPT_FILE'] = true, +} + +--- Coerce a metadata value to a Lua boolean if it represents one, otherwise return nil. +--- Handles raw Lua booleans, Pandoc MetaBool (via `pandoc.utils.type`), and the strings +--- `'true'`/`'false'` (case-insensitive). Any other value yields nil so callers can +--- treat it as a non-boolean (for example a file path string). +--- @param value any The metadata value to coerce +--- @return boolean|nil The coerced boolean, or nil if value is not boolean-like +local function coerce_boolean(value) + if value == nil then return nil end + if type(value) == 'boolean' then return value end + if pandoc.utils.type(value) == 'boolean' then return value end + local s = pandoc.utils.stringify(value) + if s == nil or s == '' then return nil end + local lower = s:lower() + if lower == 'true' then return true end + if lower == 'false' then return false end + return nil +end + +--- Read a raw metadata value at extensions.{extension_name}.{key} without stringifying. +--- @param meta table The document metadata table +--- @param extension_name string The extension namespace key +--- @param key string The configuration key +--- @return any The raw metadata value, or nil if missing +local function get_raw_meta(meta, extension_name, key) + if not meta['extensions'] then return nil end + if not meta['extensions'][extension_name] then return nil end + return meta['extensions'][extension_name][key] +end + +--- Parse a list-style metadata value into a set of strings. +--- Accepts a MetaList of strings or a single scalar string. +--- @param value any The raw metadata value +--- @return table|nil A set keyed by path string, or nil if absent +local function parse_path_list(value) + if value == nil then return nil end + local set = {} + if pandoc.utils.type(value) == 'List' or type(value) == 'table' then + local has_array = value[1] ~= nil + if has_array then + for _, entry in ipairs(value) do + local s = str.trim(pandoc.utils.stringify(entry)) + if s ~= '' then set[s] = true end + end + return next(set) and set or nil + end + end + local s = str.trim(pandoc.utils.stringify(value)) + if s == '' then return nil end + set[s] = true + return set +end + +--- Detect a server or CI execution context via well-known environment variables. +--- @return string|nil The matching environment variable name, or nil if not detected +local function detect_server_context() + local server_vars = { + 'CI', + 'GITHUB_ACTIONS', + 'GITLAB_CI', + 'CIRCLECI', + 'TRAVIS', + 'JENKINS_URL', + 'BUILDKITE', + 'TF_BUILD', + } + for _, name in ipairs(server_vars) do + local value = os.getenv(name) + if value ~= nil and value ~= '' and value ~= 'false' then + return name + end + end + return nil +end + +--- Test if a dot-separated path is matched by a path set, including any ancestor entry. +--- @param path string The dot-separated path to test (e.g. 'quarto.doc.input_file') +--- @param set table|nil The path set to check against +--- @return boolean True if the path or any of its ancestors is in the set +local function path_matches(path, set) + if not set then return false end + if set[path] then return true end + local prefix = path + while true do + local dot = prefix:find('%.[^%.]*$') + if not dot then break end + prefix = prefix:sub(1, dot - 1) + if set[prefix] then return true end + end + return false +end + +--- Decide whether a path should be retained in the exported metadata. +--- Applies include whitelist (if set), exclude blacklist, and sensitive defaults. +--- The check also passes for descendants of an included path so containers above +--- a kept leaf are preserved. +--- @param path string The dot-separated path to test +--- @return boolean True if the path should be kept +local function should_keep(path) + if exclude_sensitive and SENSITIVE_PATHS[path] then return false end + if path_matches(path, exclude_paths) then return false end + if include_paths then + if path_matches(path, include_paths) then return true end + for entry in pairs(include_paths) do + if entry:sub(1, #path + 1) == path .. '.' then return true end + end + return false + end + return true +end + +--- Recursively filter a metadata tree against include/exclude/sensitive rules. +--- Empty branches are pruned so the resulting JSON stays compact. A node whose +--- own path is dropped (e.g. a sensitive container) is removed wholesale, so all +--- descendants disappear regardless of their individual paths. +--- @param value any The current subtree value +--- @param path string The dot-separated path of the current node +--- @return any The filtered value, or nil if the entire branch was filtered out +local function filter_tree(value, path) + if path ~= '' and not should_keep(path) then return nil end + if type(value) ~= 'table' then + return value + end + local is_array = value[1] ~= nil + if is_array then + return value + end + local result = {} + local kept = false + for k, v in pairs(value) do + local child_path = path == '' and tostring(k) or (path .. '.' .. tostring(k)) + local filtered = filter_tree(v, child_path) + if filtered ~= nil then + result[k] = filtered + kept = true + end + end + if kept then return result end + if next(value) == nil then return value end + return nil +end + --- Export metadata to JSON file --- @param metadata table The metadata to export --- @param filepath string The file path to write to local function export_to_json(metadata, filepath) - local json_content = quarto.json.encode(metadata) + local payload = filter_tree(metadata, '') or {} + local json_content = quarto.json.encode(payload) local file, err = io.open(filepath, 'w') if file then file:write(json_content) @@ -34,19 +197,37 @@ end --- @param meta table The document metadata table --- @return table The metadata table local function get_configuration(meta) - local meta_json = meta_mod.get_metadata_value(meta, 'lua-env', 'json') - - -- Set JSON file path - if not str.is_empty(meta_json) then - if meta_json == 'true' then - json_file = 'lua-env.json' - elseif meta_json == 'false' then - json_file = nil - else - json_file = meta_json --[[@as string]] + -- Reset module-level state per document to avoid cross-document leakage in batch renders. + json_file = nil + include_paths = nil + exclude_paths = nil + exclude_sensitive = true + warn_on_server = true + + local raw_json = get_raw_meta(meta, 'lua-env', 'json') + local json_bool = coerce_boolean(raw_json) + if json_bool == true then + json_file = 'lua-env.json' + elseif json_bool == false then + json_file = nil + elseif raw_json ~= nil then + local s = pandoc.utils.stringify(raw_json) + if s ~= '' then + json_file = s end end + include_paths = parse_path_list(get_raw_meta(meta, 'lua-env', 'json-include')) + exclude_paths = parse_path_list(get_raw_meta(meta, 'lua-env', 'json-exclude')) + + local raw_sensitive = get_raw_meta(meta, 'lua-env', 'json-exclude-sensitive') + local sensitive_bool = coerce_boolean(raw_sensitive) + if sensitive_bool ~= nil then exclude_sensitive = sensitive_bool end + + local raw_warn = get_raw_meta(meta, 'lua-env', 'json-warn-on-server') + local warn_bool = coerce_boolean(raw_warn) + if warn_bool ~= nil then warn_on_server = warn_bool end + return meta end @@ -95,6 +276,17 @@ local function populate_lua_env(meta) -- Export to JSON if configured if json_file then + if warn_on_server then + local ctx = detect_server_context() + if ctx then + log.log_warning( + EXTENSION_NAME, + 'JSON export is enabled in a server/CI context (' .. ctx .. ' is set). ' .. + 'The exported file may leak host filesystem paths. ' .. + 'Set extensions.lua-env.json-warn-on-server: false to silence this warning.' + ) + end + end export_to_json(meta['lua-env'], json_file) end diff --git a/example.qmd b/example.qmd index 3abfdc1..7bdd937 100644 --- a/example.qmd +++ b/example.qmd @@ -13,7 +13,11 @@ filters: - lua-env extensions: lua-env: - json: true # Export metadata to lua-env.json + json: true + json-exclude: + - quarto._quarto + json-exclude-sensitive: true + json-warn-on-server: true --- ## Introduction @@ -149,21 +153,81 @@ See [Pandoc LUA API - Global Variables](https://pandoc.org/lua-filters.html#glob ## JSON Export -This document is configured to export the `lua-env` metadata to a JSON file. +This document is configured to export the `lua-env` metadata to a JSON file, with the `quarto._quarto` branch removed and sensitive filesystem paths redacted. -By default, no JSON file is written (`json: false`). To enable JSON export, set `json: true` or provide a custom file path. +```yaml +extensions: + lua-env: + json: true + json-exclude: + - quarto._quarto + json-exclude-sensitive: true + json-warn-on-server: true +``` + +By default, no JSON file is written (`json: false`). +To enable JSON export, set `json: true` (writes to `lua-env.json`) or provide a custom file path. + +### JSON export schema + +The exported JSON is an object with two top-level keys, `pandoc` and `quarto`. + +```json +{ + "pandoc": { + "FORMAT": "html", + "PANDOC_API_VERSION": "1.23", + "PANDOC_VERSION": "3.6.3", + "PANDOC_READER_OPTIONS": { }, + "PANDOC_WRITER_OPTIONS": { }, + "PANDOC_STATE": { } + }, + "quarto": { + "version": [1, 7, 32], + "doc": { }, + "project": { }, + "log": { }, + "json": { } + } +} +``` + +Container shapes mirror the live Pandoc/Quarto objects. +Functions and userdata are dropped. +Empty branches are omitted. -The YAML header includes: +### Filtering options + +- `json-include`: array of dot-separated paths to keep (e.g. `pandoc.FORMAT`). + Everything outside the listed paths is omitted. +- `json-exclude`: array of dot-separated paths to drop, applied after the include whitelist. +- `json-exclude-sensitive` (default `true`): redacts built-in sensitive paths that expose host filesystem layout, namely `quarto.doc.input_file`, `quarto.doc.output_file`, `quarto.project.directory`, `quarto.project.output_directory`, and `pandoc.PANDOC_SCRIPT_FILE`. + Set to `false` to include them. +- `json-warn-on-server` (default `true`): emits a warning when JSON export is enabled in a CI or server context (`CI`, `GITHUB_ACTIONS`, `GITLAB_CI`, `CIRCLECI`, `TRAVIS`, `JENKINS_URL`, `BUILDKITE`, `TF_BUILD`). + Set to `false` to silence. + +For example, to export only the active Pandoc format and Quarto version: ```yaml extensions: lua-env: - json: true # Export metadata to lua-env.json + json: true + json-include: + - pandoc.FORMAT + - quarto.version ``` -This will create a file named `lua-env.json` in the same directory as this document, containing all the LUA environment metadata collected during rendering. +To remove a noisy branch while keeping everything else: + +```yaml +extensions: + lua-env: + json: true + json-exclude: + - quarto._quarto +``` -You can also specify a custom file path: +To use a custom output filename: ```yaml extensions: