From a5e55d24db9c8aa776022047522ee74a718432cb Mon Sep 17 00:00:00 2001 From: voidborne-d Date: Wed, 13 May 2026 00:19:33 +0800 Subject: [PATCH] fix(config): preserve bare $ in config values via safe_substitute (closes #2349) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit string.Template.substitute raises ValueError for any invalid placeholder, which crashes config load whenever a value contains a bare $ — for example file_pattern: ".*\.md$" (a valid regex anchor). The error surface is a hard crash with an unhelpful "Invalid placeholder in string" message. Switch to safe_substitute, which leaves invalid placeholders (bare $, $$, etc.) as literal text. To preserve the existing missing-env-var detection, walk Template.pattern.finditer up-front and raise ConfigParsingError for any ${VAR} or $VAR placeholder whose name is not in os.environ. KeyError is no longer raised by safe_substitute, so the previous try/except branch becomes unreachable. Tests: - test_load_config_preserves_literal_dollar_signs: $ in name / nested_str / trailing positions round-trips verbatim. - test_load_config_mixed_env_var_and_literal_dollar: ${LOAD_CONFIG_NAME} substitutes correctly while a sibling regex value keeps its trailing $. - Existing test_load_config "missing env var" path still raises ConfigParsingError. Co-authored-by: Claude Opus 4.7 (1M context) --- .../patch-20260512161909030693.json | 4 ++ .../graphrag_common/config/load_config.py | 21 +++++++--- .../fixtures/config_with_dollar.yaml | 10 +++++ .../fixtures/config_with_dollar_and_env.yaml | 10 +++++ tests/unit/load_config/test_load_config.py | 40 +++++++++++++++++++ 5 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 .semversioner/next-release/patch-20260512161909030693.json create mode 100644 tests/unit/load_config/fixtures/config_with_dollar.yaml create mode 100644 tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml diff --git a/.semversioner/next-release/patch-20260512161909030693.json b/.semversioner/next-release/patch-20260512161909030693.json new file mode 100644 index 0000000000..3cc57c21de --- /dev/null +++ b/.semversioner/next-release/patch-20260512161909030693.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Preserve bare `$` characters in config values (e.g. `file_pattern: \".*\\\\.md$\"`) by using `Template.safe_substitute`; missing env vars still raise `ConfigParsingError`." +} diff --git a/packages/graphrag-common/graphrag_common/config/load_config.py b/packages/graphrag-common/graphrag_common/config/load_config.py index c8929149f1..216bb50242 100644 --- a/packages/graphrag-common/graphrag_common/config/load_config.py +++ b/packages/graphrag-common/graphrag_common/config/load_config.py @@ -83,12 +83,21 @@ def _get_parser_for_file(file_path: str | Path) -> Callable[[str], dict[str, Any def _parse_env_variables(text: str) -> str: - """Parse environment variables in the configuration text.""" - try: - return Template(text).substitute(os.environ) - except KeyError as error: - msg = f"Environment variable not found: {error}" - raise ConfigParsingError(msg) from error + r"""Parse environment variables in the configuration text. + + Uses ``Template.safe_substitute`` so that bare ``$`` characters appearing in + legitimate config values (for example, regex anchors like ``.*\.md$`` in a + ``file_pattern``) are passed through unchanged instead of raising a + ``ValueError`` for invalid placeholders. Missing environment variables are + still detected up-front and surfaced as ``ConfigParsingError``. + """ + template = Template(text) + for match in template.pattern.finditer(text): + name = match.group("named") or match.group("braced") + if name is not None and name not in os.environ: + msg = f"Environment variable not found: '{name}'" + raise ConfigParsingError(msg) + return template.safe_substitute(os.environ) def _recursive_merge_dicts(dest: dict[str, Any], src: dict[str, Any]) -> None: diff --git a/tests/unit/load_config/fixtures/config_with_dollar.yaml b/tests/unit/load_config/fixtures/config_with_dollar.yaml new file mode 100644 index 0000000000..bd1a818899 --- /dev/null +++ b/tests/unit/load_config/fixtures/config_with_dollar.yaml @@ -0,0 +1,10 @@ +name: "match_dollar_$" +value: 100 +nested: + nested_str: ".*\\.md$" + nested_int: 42 +nested_list: + - nested_str: "list_value_1" + nested_int: 7 + - nested_str: "trailing$" + nested_int: 8 diff --git a/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml b/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml new file mode 100644 index 0000000000..f2da9202ed --- /dev/null +++ b/tests/unit/load_config/fixtures/config_with_dollar_and_env.yaml @@ -0,0 +1,10 @@ +name: ${LOAD_CONFIG_NAME} +value: 100 +nested: + nested_str: ".*\\.md$" + nested_int: 42 +nested_list: + - nested_str: "list_value_1" + nested_int: 7 + - nested_str: "list_value_2" + nested_int: 8 diff --git a/tests/unit/load_config/test_load_config.py b/tests/unit/load_config/test_load_config.py index 0945cf214f..e8e2ce40f2 100644 --- a/tests/unit/load_config/test_load_config.py +++ b/tests/unit/load_config/test_load_config.py @@ -155,3 +155,43 @@ def test_load_config(): assert config_with_env_vars.nested_list[0].nested_int == 7 assert config_with_env_vars.nested_list[1].nested_str == "list_value_2" assert config_with_env_vars.nested_list[1].nested_int == 8 + + +def test_load_config_preserves_literal_dollar_signs(): + """Bare ``$`` characters in config values must be preserved verbatim. + + Regex anchors like ``.*\\.md$`` in fields such as ``file_pattern`` are valid + config values; ``string.Template.substitute`` previously raised + ``ValueError`` for these, crashing config load. The loader now uses + ``safe_substitute`` so invalid placeholders are passed through unchanged. + """ + config_directory = Path(__file__).parent / "fixtures" + config_path = config_directory / "config_with_dollar.yaml" + + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + set_cwd=False, + ) + + assert config.name == "match_dollar_$" + assert config.nested.nested_str == ".*\\.md$" + assert config.nested_list[1].nested_str == "trailing$" + + +def test_load_config_mixed_env_var_and_literal_dollar(): + """``${VAR}`` substitution and bare ``$`` literals must coexist.""" + config_directory = Path(__file__).parent / "fixtures" + config_path = config_directory / "config_with_dollar_and_env.yaml" + env_path = config_directory / "test.env" + + cwd = Path.cwd() + config = load_config( + config_initializer=TestConfigModel, + config_path=config_path, + dot_env_path=env_path, + ) + os.chdir(cwd) + + assert config.name == "env_name" + assert config.nested.nested_str == ".*\\.md$"