From 18f40d0ddb8475c2abc49c6c722ba9d64cbf3a44 Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Fri, 5 Dec 2025 19:47:11 +1000
Subject: [PATCH 01/20] feat: add support to use inferred build tools and to
 extract tool-specific build dependency information (#1256)

Adds support to use inferred build tools and to extract tool-specific build dependency information for buildspec generation.

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../common_spec/pypi_spec.py                  |  44 +++++++-
 .../dockerfile/pypi_dockerfile_output.py      |  15 ++-
 .../test_pypi_dockerfile_output.ambr          |   2 +-
 .../compare_dockerfile_buildspec.py           | 106 ++++++++++++++++++
 .../expected_default.buildspec                |   4 +-
 .../expected_dockerfile.buildspec             |  50 +++++++++
 .../cases/pypi_cachetools/test.yaml           |  14 +++
 .../expected_dockerfile.buildspec             |  50 +++++++++
 .../cases/pypi_markdown-it-py/test.yaml       |  14 +++
 .../pypi_toga/expected_default.buildspec      |   4 +-
 .../pypi_toga/expected_dockerfile.buildspec   |  50 +++++++++
 tests/integration/cases/pypi_toga/test.yaml   |  14 +++
 tests/integration/run.py                      |   1 +
 13 files changed, 361 insertions(+), 7 deletions(-)
 create mode 100644 tests/build_spec_generator/dockerfile/compare_dockerfile_buildspec.py
 create mode 100644 tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
 create mode 100644 tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
 create mode 100644 tests/integration/cases/pypi_toga/expected_dockerfile.buildspec

diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index 0f7d78824..bb90ba6a1 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -6,6 +6,7 @@
 import logging
 import os
 import re
+from typing import Any
 
 import tomli
 from packageurl import PackageURL
@@ -67,15 +68,17 @@ def get_default_build_commands(
 
             match build_tool_name:
                 case "pip":
-                    default_build_commands.append("python -m build".split())
+                    default_build_commands.append("python -m build --wheel -n".split())
                 case "poetry":
                     default_build_commands.append("poetry build".split())
                 case "flit":
+                    # We might also want to deal with existence flit.ini, we can do so via
+                    # "python -m flit.tomlify"
                     default_build_commands.append("flit build".split())
                 case "hatch":
                     default_build_commands.append("hatch build".split())
                 case "conda":
-                    default_build_commands.append("conda build".split())
+                    default_build_commands.append('echo("Not supported")'.split())
                 case _:
                     pass
 
@@ -156,6 +159,7 @@ def resolve_fields(self, purl: PackageURL) -> None:
                 try:
                     with pypi_package_json.sourcecode():
                         try:
+                            # Get the build time requirements from ["build-system", "requires"]
                             pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml")
                             content = tomli.loads(pyproject_content.decode("utf-8"))
                             requires = json_extract(content, ["build-system", "requires"], list)
@@ -164,10 +168,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             backend = json_extract(content, ["build-system", "build-backend"], str)
                             if backend:
                                 build_backends_set.add(backend.replace(" ", ""))
-
                             python_version_constraint = json_extract(content, ["project", "requires-python"], str)
                             if python_version_constraint:
                                 python_version_set.add(python_version_constraint.replace(" ", ""))
+                            self.apply_tool_specific_inferences(build_requires_set, python_version_set, content)
                             logger.debug(
                                 "After analyzing pyproject.toml from the sdist: build-requires: %s, build_backend: %s",
                                 build_requires_set,
@@ -239,6 +243,40 @@ def resolve_fields(self, purl: PackageURL) -> None:
 
         self.data["build_commands"] = patched_build_commands
 
+    def apply_tool_specific_inferences(
+        self, build_requires_set: set[str], python_version_set: set[str], pyproject_contents: dict[str, Any]
+    ) -> None:
+        """
+        Based on build tools inferred, look into the pyproject.toml for related additional dependencies.
+
+        Parameters
+        ----------
+        build_requires_set: set[str]
+            Set of build requirements to populate.
+        python_version_set: set[str]
+            Set of compatible interpreter versions to populate.
+        pyproject_contents: dict[str, Any]
+            Parsed contents of the pyproject.toml file.
+        """
+        # If we have hatch as a build_tool, we will examine [tool.hatch.build.hooks.*] to
+        # look for any additional build dependencies declared there.
+        if "hatch" in self.data["build_tools"]:
+            # Look for [tool.hatch.build.hooks.*]
+            hatch_build_hooks = json_extract(pyproject_contents, ["tool", "hatch", "build", "hooks"], dict)
+            if hatch_build_hooks:
+                for _, section in hatch_build_hooks.items():
+                    dependencies = section.get("dependencies")
+                    if dependencies:
+                        build_requires_set.update(elem.replace(" ", "") for elem in dependencies)
+        # If we have flit as a build_tool, we will check if the legacy header [tool.flit.metadata] exists,
+        # and if so, check to see if we can use its "requires-python".
+        if "flit" in self.data["build_tools"]:
+            flit_python_version_constraint = json_extract(
+                pyproject_contents, ["tool", "flit", "metadata", "requires-python"], str
+            )
+            if flit_python_version_constraint:
+                python_version_set.add(flit_python_version_constraint.replace(" ", ""))
+
     def read_directory(self, wheel_path: str, purl: PackageURL) -> tuple[str, str]:
         """
         Read in the WHEEL and METADATA file from the .dist_info directory.
diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
index fd41f063c..ef2360a5c 100644
--- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
+++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
@@ -38,6 +38,17 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
         logger.debug("Could not derive a specific interpreter version.")
         raise GenerateBuildSpecError("Could not derive specific interpreter version.")
     backend_install_commands: str = " && ".join(build_backend_commands(buildspec))
+    build_tool_install: str = ""
+    if (
+        buildspec["build_tools"][0] != "pip"
+        and buildspec["build_tools"][0] != "conda"
+        and buildspec["build_tools"][0] != "flit"
+    ):
+        build_tool_install = f"pip install {buildspec['build_tools'][0]} && "
+    elif buildspec["build_tools"][0] == "flit":
+        build_tool_install = (
+            f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && "
+        )
     dockerfile_content = f"""
     #syntax=docker/dockerfile:1.10
     FROM oraclelinux:9
@@ -87,7 +98,7 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
     EOF
 
     # Run the build
-    RUN /deps/bin/python -m build --wheel -n
+    RUN {"source /deps/bin/activate && " + build_tool_install + " ".join(x for x in buildspec["build_commands"][0])}
     """
 
     return dedent(dockerfile_content)
@@ -148,4 +159,6 @@ def build_backend_commands(buildspec: BaseBuildSpecDict) -> list[str]:
     commands: list[str] = []
     for backend, version_constraint in buildspec["build_requires"].items():
         commands.append(f'/deps/bin/pip install "{backend}{version_constraint}"')
+    # For a stable order on the install commands
+    commands.sort()
     return commands
diff --git a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
index a39631a05..696ee6f8d 100644
--- a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
+++ b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
@@ -50,7 +50,7 @@
   EOF
   
   # Run the build
-  RUN /deps/bin/python -m build --wheel -n
+  RUN source /deps/bin/activate && python -m build
   
   '''
 # ---
diff --git a/tests/build_spec_generator/dockerfile/compare_dockerfile_buildspec.py b/tests/build_spec_generator/dockerfile/compare_dockerfile_buildspec.py
new file mode 100644
index 000000000..8c181d8d5
--- /dev/null
+++ b/tests/build_spec_generator/dockerfile/compare_dockerfile_buildspec.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Script to compare a generated dockerfile buildspec."""
+
+import argparse
+import logging
+from collections.abc import Callable
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logging.basicConfig(format="[%(filename)s:%(lineno)s %(tag)s] %(message)s")
+
+
+def log_with_tag(tag: str) -> Callable[[str], None]:
+    """Generate a log function that prints the name of the file and a tag at the beginning of each line."""
+
+    def log_fn(msg: str) -> None:
+        logger.info(msg, extra={"tag": tag})
+
+    return log_fn
+
+
+log_info = log_with_tag("INFO")
+log_err = log_with_tag("ERROR")
+log_passed = log_with_tag("PASSED")
+log_failed = log_with_tag("FAILED")
+
+
+def log_diff(result: str, expected: str) -> None:
+    """Pretty-print the diff of two strings."""
+    output = [
+        *("----  Result  ---", result),
+        *("---- Expected ---", expected),
+        "-----------------",
+    ]
+    log_info("\n".join(output))
+
+
+def main() -> int:
+    """Compare a Macaron generated dockerfile buildspec.
+
+    Returns
+    -------
+    int
+        0 if the generated dockerfile matches the expected output, or non-zero otherwise.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("result_dockerfile", help="the result dockerfile buildspec")
+    parser.add_argument("expected_dockerfile_buildspec", help="the expected buildspec dockerfile")
+    args = parser.parse_args()
+
+    # Load both files
+    with open(args.result_dockerfile, encoding="utf-8") as file:
+        buildspec = normalize(file.read())
+
+    with open(args.expected_dockerfile_buildspec, encoding="utf-8") as file:
+        expected_buildspec = normalize(file.read())
+
+    log_info(
+        f"Comparing the dockerfile buildspec {args.result_dockerfile} with the expected "
+        + "output dockerfile {args.expected_dockerfile_buildspec}"
+    )
+
+    # Compare the files
+    return compare(buildspec, expected_buildspec)
+
+
+def normalize(contents: str) -> list[str]:
+    """Convert string of file contents to list of its non-empty lines"""
+    return [line.strip() for line in contents.splitlines() if line.strip()]
+
+
+def compare(buildspec: list[str], expected_buildspec: list[str]) -> int:
+    """Compare the lines in the two files directly.
+
+    Early return when an unexpected difference is found. If the lengths
+    mismatch, but the first safe_index_max lines are the same, print
+    the missing/extra lines.
+
+    Returns
+    -------
+    int
+        0 if the generated dockerfile matches the expected output, or non-zero otherwise.
+    """
+    safe_index_max = min(len(buildspec), len(expected_buildspec))
+    for index in range(safe_index_max):
+        if buildspec[index] != expected_buildspec[index]:
+            # Log error
+            log_err("Mismatch found:")
+            # Log diff
+            log_diff(buildspec[index], expected_buildspec[index])
+            return 1
+    if safe_index_max < len(expected_buildspec):
+        log_err("Mismatch found: result is missing trailing lines")
+        log_diff("", "\n".join(expected_buildspec[safe_index_max:]))
+        return 1
+    if safe_index_max < len(buildspec):
+        log_err("Mismatch found: result has extra trailing lines")
+        log_diff("\n".join(buildspec[safe_index_max:]), "")
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
index 5af209e96..0b5d8acfa 100644
--- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec
+++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
@@ -19,7 +19,9 @@
         [
             "python",
             "-m",
-            "build"
+            "build",
+            "--wheel",
+            "-n"
         ]
     ],
     "build_requires": {
diff --git a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
new file mode 100644
index 000000000..749757f91
--- /dev/null
+++ b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
@@ -0,0 +1,50 @@
+
+#syntax=docker/dockerfile:1.10
+FROM oraclelinux:9
+
+# Install core tools
+RUN dnf -y install which wget tar git
+
+# Install compiler and make
+RUN dnf -y install gcc make
+
+# Download and unzip interpreter
+RUN <<EOF
+    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
+    tar -xf Python-3.14.0.tgz
+EOF
+
+# Install necessary libraries to build the interpreter
+# From: https://devguide.python.org/getting-started/setup-building/
+RUN dnf install \
+  gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
+  readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
+  xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
+  perf expat expat-devel mpdecimal python3-pip
+
+# Build interpreter and create venv
+RUN <<EOF
+    cd Python-3.14.0
+    ./configure --with-pydebug
+    make -s -j $(nproc)
+    ./python -m venv /deps
+EOF
+
+# Clone code to rebuild
+RUN <<EOF
+    mkdir src
+    cd src
+    git clone https://github.com/tkem/cachetools .
+    git checkout --force ca7508fd56103a1b6d6f17c8e93e36c60b44ca25
+EOF
+
+WORKDIR /src
+
+# Install build and the build backends
+RUN <<EOF
+    /deps/bin/pip install "setuptools==80.9.0" && /deps/bin/pip install "wheel"
+    /deps/bin/pip install build
+EOF
+
+# Run the build
+RUN source /deps/bin/activate && python -m build --wheel -n
diff --git a/tests/integration/cases/pypi_cachetools/test.yaml b/tests/integration/cases/pypi_cachetools/test.yaml
index c2d0575b1..2c362f623 100644
--- a/tests/integration/cases/pypi_cachetools/test.yaml
+++ b/tests/integration/cases/pypi_cachetools/test.yaml
@@ -30,3 +30,17 @@ steps:
     kind: default_build_spec
     result: output/buildspec/pypi/cachetools/macaron.buildspec
     expected: expected_default.buildspec
+- name: Generate the buildspec
+  kind: gen-build-spec
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/cachetools@6.2.1
+    - --output-format
+    - dockerfile
+- name: Compare Dockerfile.
+  kind: compare
+  options:
+    kind: dockerfile_build_spec
+    result: output/buildspec/pypi/cachetools/dockerfile.buildspec
+    expected: expected_dockerfile.buildspec
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
new file mode 100644
index 000000000..981619196
--- /dev/null
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
@@ -0,0 +1,50 @@
+
+#syntax=docker/dockerfile:1.10
+FROM oraclelinux:9
+
+# Install core tools
+RUN dnf -y install which wget tar git
+
+# Install compiler and make
+RUN dnf -y install gcc make
+
+# Download and unzip interpreter
+RUN <<EOF
+    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
+    tar -xf Python-3.14.0.tgz
+EOF
+
+# Install necessary libraries to build the interpreter
+# From: https://devguide.python.org/getting-started/setup-building/
+RUN dnf install \
+  gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
+  readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
+  xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
+  perf expat expat-devel mpdecimal python3-pip
+
+# Build interpreter and create venv
+RUN <<EOF
+    cd Python-3.14.0
+    ./configure --with-pydebug
+    make -s -j $(nproc)
+    ./python -m venv /deps
+EOF
+
+# Clone code to rebuild
+RUN <<EOF
+    mkdir src
+    cd src
+    git clone https://github.com/executablebooks/markdown-it-py .
+    git checkout --force c62983f1554124391b47170180e6c62df4d476ca
+EOF
+
+WORKDIR /src
+
+# Install build and the build backends
+RUN <<EOF
+    /deps/bin/pip install "flit==3.12.0" && /deps/bin/pip install "flit_core<4,>=3.4"
+    /deps/bin/pip install build
+EOF
+
+# Run the build
+RUN source /deps/bin/activate && pip install flit && if test -f "flit.ini"; then python -m flit.tomlify; fi && flit build
diff --git a/tests/integration/cases/pypi_markdown-it-py/test.yaml b/tests/integration/cases/pypi_markdown-it-py/test.yaml
index a57b7d2cf..d3b0b365a 100644
--- a/tests/integration/cases/pypi_markdown-it-py/test.yaml
+++ b/tests/integration/cases/pypi_markdown-it-py/test.yaml
@@ -27,3 +27,17 @@ steps:
     kind: default_build_spec
     result: output/buildspec/pypi/markdown-it-py/macaron.buildspec
     expected: expected_default.buildspec
+- name: Generate the buildspec
+  kind: gen-build-spec
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/markdown-it-py@4.0.0
+    - --output-format
+    - dockerfile
+- name: Compare Dockerfile
+  kind: compare
+  options:
+    kind: dockerfile_build_spec
+    result: output/buildspec/pypi/markdown-it-py/dockerfile.buildspec
+    expected: expected_dockerfile.buildspec
diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec
index ffb146e81..819113207 100644
--- a/tests/integration/cases/pypi_toga/expected_default.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_default.buildspec
@@ -19,7 +19,9 @@
         [
             "python",
             "-m",
-            "build"
+            "build",
+            "--wheel",
+            "-n"
         ]
     ],
     "build_requires": {
diff --git a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
new file mode 100644
index 000000000..47e1e012a
--- /dev/null
+++ b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
@@ -0,0 +1,50 @@
+
+#syntax=docker/dockerfile:1.10
+FROM oraclelinux:9
+
+# Install core tools
+RUN dnf -y install which wget tar git
+
+# Install compiler and make
+RUN dnf -y install gcc make
+
+# Download and unzip interpreter
+RUN <<EOF
+    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
+    tar -xf Python-3.14.0.tgz
+EOF
+
+# Install necessary libraries to build the interpreter
+# From: https://devguide.python.org/getting-started/setup-building/
+RUN dnf install \
+  gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
+  readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
+  xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
+  perf expat expat-devel mpdecimal python3-pip
+
+# Build interpreter and create venv
+RUN <<EOF
+    cd Python-3.14.0
+    ./configure --with-pydebug
+    make -s -j $(nproc)
+    ./python -m venv /deps
+EOF
+
+# Clone code to rebuild
+RUN <<EOF
+    mkdir src
+    cd src
+    git clone https://github.com/beeware/toga .
+    git checkout --force ef1912b0a1b5c07793f9aa372409f5b9d36f2604
+EOF
+
+WORKDIR /src
+
+# Install build and the build backends
+RUN <<EOF
+    /deps/bin/pip install "setuptools==80.3.1" && /deps/bin/pip install "setuptools_dynamic_dependencies==1.0.0" && /deps/bin/pip install "setuptools_scm==8.3.1"
+    /deps/bin/pip install build
+EOF
+
+# Run the build
+RUN source /deps/bin/activate && python -m build --wheel -n
diff --git a/tests/integration/cases/pypi_toga/test.yaml b/tests/integration/cases/pypi_toga/test.yaml
index 4b721d0c0..1118f3217 100644
--- a/tests/integration/cases/pypi_toga/test.yaml
+++ b/tests/integration/cases/pypi_toga/test.yaml
@@ -34,3 +34,17 @@ steps:
     kind: default_build_spec
     result: output/buildspec/pypi/toga/macaron.buildspec
     expected: expected_default.buildspec
+- name: Generate the buildspec
+  kind: gen-build-spec
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/toga@0.5.1
+    - --output-format
+    - dockerfile
+- name: Compare Dockerfile
+  kind: compare
+  options:
+    kind: dockerfile_build_spec
+    result: output/buildspec/pypi/toga/dockerfile.buildspec
+    expected: expected_dockerfile.buildspec
diff --git a/tests/integration/run.py b/tests/integration/run.py
index 3cc50fde7..e1cec81cf 100644
--- a/tests/integration/run.py
+++ b/tests/integration/run.py
@@ -82,6 +82,7 @@ def configure_logging(verbose: bool) -> None:
     "find_source": ["tests", "find_source", "compare_source_reports.py"],
     "rc_build_spec": ["tests", "build_spec_generator", "reproducible_central", "compare_rc_build_spec.py"],
     "default_build_spec": ["tests", "build_spec_generator", "common_spec", "compare_default_buildspec.py"],
+    "dockerfile_build_spec": ["tests", "build_spec_generator", "dockerfile", "compare_dockerfile_buildspec.py"],
 }
 
 VALIDATE_SCHEMA_SCRIPTS: dict[str, Sequence[str]] = {

From d6627dfad917bcfffec92e38eda0e42903e739bc Mon Sep 17 00:00:00 2001
From: Nicholas Allen <nicholas.allen@oracle.com>
Date: Thu, 11 Dec 2025 13:51:44 +1000
Subject: [PATCH 02/20] feat: add new dataflow analysis, replacing existing
 analysis for GitHub Actions (#1229)

Signed-off-by: Nicholas Allen <nicholas.allen@oracle.com>
---
 docs/source/conf.py                           |    6 +-
 ...acaron.build_spec_generator.dockerfile.rst |   26 +
 .../apidoc/macaron.build_spec_generator.rst   |    1 +
 ...acaron.code_analyzer.dataflow_analysis.rst |   98 +
 .../apidoc/macaron.code_analyzer.rst          |   13 +-
 .../apidoc/macaron.parsers.rst                |    8 +
 .../apidoc/macaron.repo_finder.rst            |    8 +
 .../macaron.slsa_analyzer.build_tool.rst      |   32 +
 ...lsa_analyzer.ci_service.github_actions.rst |    8 -
 golang/cmd/bashexprparser/bashexprparser.go   |   59 +
 golang/cmd/bashparser/bashparser.go           |    7 +-
 golang/internal/bashparser/bashparser.go      |   63 +-
 pyproject.toml                                |    2 +
 src/macaron/code_analyzer/call_graph.py       |  104 -
 .../dataflow_analysis/__init__.py             |    2 +
 .../dataflow_analysis/analysis.py             |  469 ++++
 .../code_analyzer/dataflow_analysis/bash.py   | 1891 +++++++++++++++++
 .../dataflow_analysis/cmd_parser.py           |   88 +
 .../code_analyzer/dataflow_analysis/core.py   |  695 ++++++
 .../dataflow_analysis/evaluation.py           |  772 +++++++
 .../code_analyzer/dataflow_analysis/facts.py  |  702 ++++++
 .../code_analyzer/dataflow_analysis/github.py | 1314 ++++++++++++
 .../dataflow_analysis/github_expr.py          |  141 ++
 .../code_analyzer/dataflow_analysis/models.py |  679 ++++++
 .../dataflow_analysis/printing.py             |  681 ++++++
 .../run_analysis_standalone.py                |   46 +
 src/macaron/parsers/bashparser.py             |  243 +--
 src/macaron/parsers/bashparser_model.py       |  848 ++++++++
 .../build_tool/base_build_tool.py             |   10 +-
 .../checks/build_as_code_check.py             |  202 +-
 .../checks/build_script_check.py              |    5 +-
 .../checks/build_service_check.py             |    5 +-
 .../github_actions_vulnerability_check.py     |   87 +-
 .../checks/trusted_builder_l3_check.py        |   73 +-
 .../ci_service/base_ci_service.py             |   65 +-
 .../slsa_analyzer/ci_service/circleci.py      |   11 +-
 .../ci_service/github_actions/analyzer.py     |  801 -------
 .../github_actions/github_actions_ci.py       |  120 +-
 .../slsa_analyzer/ci_service/gitlab_ci.py     |   12 +-
 .../slsa_analyzer/ci_service/jenkins.py       |  148 +-
 .../slsa_analyzer/ci_service/travis.py        |   12 +-
 src/macaron/slsa_analyzer/specs/ci_spec.py    |    6 +-
 tests/conftest.py                             |   43 +-
 tests/parsers/bashparser/test_bashparser.py   |   40 +-
 tests/provenance/test_provenance_finder.py    |    6 +-
 tests/slsa_analyzer/build_tool/test_conda.py  |    5 +-
 tests/slsa_analyzer/build_tool/test_docker.py |    7 +-
 tests/slsa_analyzer/build_tool/test_flit.py   |    5 +-
 tests/slsa_analyzer/build_tool/test_go.py     |    5 +-
 tests/slsa_analyzer/build_tool/test_gradle.py |    7 +-
 tests/slsa_analyzer/build_tool/test_hatch.py  |    5 +-
 tests/slsa_analyzer/build_tool/test_maven.py  |    7 +-
 tests/slsa_analyzer/build_tool/test_npm.py    |    7 +-
 tests/slsa_analyzer/build_tool/test_pip.py    |    5 +-
 tests/slsa_analyzer/build_tool/test_poetry.py |    7 +-
 tests/slsa_analyzer/build_tool/test_yarn.py   |    7 +-
 .../checks/test_build_as_code_check.py        |   29 +-
 .../checks/test_build_service_check.py        |    4 +-
 ...test_github_actions_vulnerability_check.py |   11 +-
 .../test_provenance_l3_content_check.py       |    4 +-
 .../checks/test_trusted_builder_l3_check.py   |   25 +-
 .../ci_service/test_github_actions.py         |   63 +-
 tests/slsa_analyzer/test_analyze_context.py   |    4 +-
 63 files changed, 9044 insertions(+), 1815 deletions(-)
 create mode 100644 docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.dockerfile.rst
 create mode 100644 docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.dataflow_analysis.rst
 create mode 100644 golang/cmd/bashexprparser/bashexprparser.go
 delete mode 100644 src/macaron/code_analyzer/call_graph.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/__init__.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/analysis.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/bash.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/cmd_parser.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/core.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/evaluation.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/facts.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/github.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/github_expr.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/models.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/printing.py
 create mode 100644 src/macaron/code_analyzer/dataflow_analysis/run_analysis_standalone.py
 create mode 100644 src/macaron/parsers/bashparser_model.py
 delete mode 100644 src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6bad46788..31a241743 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 # Configuration file for the Sphinx documentation builder.
@@ -76,6 +76,10 @@
     "<class 'macaron.parsers.github_workflow_model.Workflow'>",
 ]
 
+suppress_warnings = [
+    "sphinx_autodoc_typehints.forward_reference",  # Sphinx has issues with resolving forward references.
+]
+
 
 # We add the docstrings for class constructors in the `__init__` methods.
 def skip(app, what, name, obj, would_skip, options):
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.dockerfile.rst b/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.dockerfile.rst
new file mode 100644
index 000000000..fd655837c
--- /dev/null
+++ b/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.dockerfile.rst
@@ -0,0 +1,26 @@
+macaron.build\_spec\_generator.dockerfile package
+=================================================
+
+.. automodule:: macaron.build_spec_generator.dockerfile
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+Submodules
+----------
+
+macaron.build\_spec\_generator.dockerfile.dockerfile\_output module
+-------------------------------------------------------------------
+
+.. automodule:: macaron.build_spec_generator.dockerfile.dockerfile_output
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.build\_spec\_generator.dockerfile.pypi\_dockerfile\_output module
+-------------------------------------------------------------------------
+
+.. automodule:: macaron.build_spec_generator.dockerfile.pypi_dockerfile_output
+   :members:
+   :show-inheritance:
+   :undoc-members:
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.rst b/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.rst
index 5bc830015..679e381d8 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.build_spec_generator.rst
@@ -14,6 +14,7 @@ Subpackages
 
    macaron.build_spec_generator.cli_command_parser
    macaron.build_spec_generator.common_spec
+   macaron.build_spec_generator.dockerfile
    macaron.build_spec_generator.reproducible_central
 
 Submodules
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.dataflow_analysis.rst b/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.dataflow_analysis.rst
new file mode 100644
index 000000000..343287f28
--- /dev/null
+++ b/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.dataflow_analysis.rst
@@ -0,0 +1,98 @@
+macaron.code\_analyzer.dataflow\_analysis package
+=================================================
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+Submodules
+----------
+
+macaron.code\_analyzer.dataflow\_analysis.analysis module
+---------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.analysis
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.bash module
+-----------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.bash
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.cmd\_parser module
+------------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.cmd_parser
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.core module
+-----------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.core
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.evaluation module
+-----------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.evaluation
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.facts module
+------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.facts
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.github module
+-------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.github
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.github\_expr module
+-------------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.github_expr
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.models module
+-------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.models
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.printing module
+---------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.printing
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
+macaron.code\_analyzer.dataflow\_analysis.run\_analysis\_standalone module
+--------------------------------------------------------------------------
+
+.. automodule:: macaron.code_analyzer.dataflow_analysis.run_analysis_standalone
+   :members:
+   :show-inheritance:
+   :undoc-members:
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.rst b/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.rst
index 6216f77e6..b46c0eac7 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.code_analyzer.rst
@@ -6,13 +6,10 @@ macaron.code\_analyzer package
    :show-inheritance:
    :undoc-members:
 
-Submodules
-----------
+Subpackages
+-----------
 
-macaron.code\_analyzer.call\_graph module
------------------------------------------
+.. toctree::
+   :maxdepth: 1
 
-.. automodule:: macaron.code_analyzer.call_graph
-   :members:
-   :show-inheritance:
-   :undoc-members:
+   macaron.code_analyzer.dataflow_analysis
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.parsers.rst b/docs/source/pages/developers_guide/apidoc/macaron.parsers.rst
index 63ad1a5e9..3dad1ee97 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.parsers.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.parsers.rst
@@ -33,6 +33,14 @@ macaron.parsers.bashparser module
    :show-inheritance:
    :undoc-members:
 
+macaron.parsers.bashparser\_model module
+----------------------------------------
+
+.. automodule:: macaron.parsers.bashparser_model
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.parsers.github\_workflow\_model module
 ----------------------------------------------
 
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst b/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
index 04a654b94..dcf5b333b 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.repo_finder.rst
@@ -57,6 +57,14 @@ macaron.repo\_finder.repo\_finder\_java module
    :show-inheritance:
    :undoc-members:
 
+macaron.repo\_finder.repo\_finder\_npm module
+---------------------------------------------
+
+.. automodule:: macaron.repo_finder.repo_finder_npm
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.repo\_finder.repo\_finder\_pypi module
 ----------------------------------------------
 
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.build_tool.rst b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.build_tool.rst
index a44611b57..e7c3e6552 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.build_tool.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.build_tool.rst
@@ -17,6 +17,14 @@ macaron.slsa\_analyzer.build\_tool.base\_build\_tool module
    :show-inheritance:
    :undoc-members:
 
+macaron.slsa\_analyzer.build\_tool.conda module
+-----------------------------------------------
+
+.. automodule:: macaron.slsa_analyzer.build_tool.conda
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.slsa\_analyzer.build\_tool.docker module
 ------------------------------------------------
 
@@ -25,6 +33,14 @@ macaron.slsa\_analyzer.build\_tool.docker module
    :show-inheritance:
    :undoc-members:
 
+macaron.slsa\_analyzer.build\_tool.flit module
+----------------------------------------------
+
+.. automodule:: macaron.slsa_analyzer.build_tool.flit
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.slsa\_analyzer.build\_tool.go module
 --------------------------------------------
 
@@ -41,6 +57,14 @@ macaron.slsa\_analyzer.build\_tool.gradle module
    :show-inheritance:
    :undoc-members:
 
+macaron.slsa\_analyzer.build\_tool.hatch module
+-----------------------------------------------
+
+.. automodule:: macaron.slsa_analyzer.build_tool.hatch
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.slsa\_analyzer.build\_tool.language module
 --------------------------------------------------
 
@@ -81,6 +105,14 @@ macaron.slsa\_analyzer.build\_tool.poetry module
    :show-inheritance:
    :undoc-members:
 
+macaron.slsa\_analyzer.build\_tool.pyproject module
+---------------------------------------------------
+
+.. automodule:: macaron.slsa_analyzer.build_tool.pyproject
+   :members:
+   :show-inheritance:
+   :undoc-members:
+
 macaron.slsa\_analyzer.build\_tool.yarn module
 ----------------------------------------------
 
diff --git a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.ci_service.github_actions.rst b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.ci_service.github_actions.rst
index d745c347f..67b6da97f 100644
--- a/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.ci_service.github_actions.rst
+++ b/docs/source/pages/developers_guide/apidoc/macaron.slsa_analyzer.ci_service.github_actions.rst
@@ -9,14 +9,6 @@ macaron.slsa\_analyzer.ci\_service.github\_actions package
 Submodules
 ----------
 
-macaron.slsa\_analyzer.ci\_service.github\_actions.analyzer module
-------------------------------------------------------------------
-
-.. automodule:: macaron.slsa_analyzer.ci_service.github_actions.analyzer
-   :members:
-   :show-inheritance:
-   :undoc-members:
-
 macaron.slsa\_analyzer.ci\_service.github\_actions.github\_actions\_ci module
 -----------------------------------------------------------------------------
 
diff --git a/golang/cmd/bashexprparser/bashexprparser.go b/golang/cmd/bashexprparser/bashexprparser.go
new file mode 100644
index 000000000..3a55db7d2
--- /dev/null
+++ b/golang/cmd/bashexprparser/bashexprparser.go
@@ -0,0 +1,59 @@
+/* Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+
+	"github.com/oracle/macaron/golang/internal/bashparser"
+	"github.com/oracle/macaron/golang/internal/filewriter"
+)
+
+// Parse the bash expression and provide parsed objects in JSON format to stdout or a file.
+// Params:
+//
+//	-input <EXPR_CONTENT>: the bash expr content in string
+//	-output <OUTPUT_FILE>: the output file path to store the JSON content
+//
+// Return code:
+//
+//	0 - Parse successfully, return the JSON as string to stdout. If -output is set, store the json content to the file.
+//		If there is any errors storing to file, the result is still printed to stdout, but the errors are put to stderr instead.
+//	1 - Error: Missing bash script or output file paths.
+//	2 - Error: Could not parse the bash script file. Parse errors will be printed to stderr.
+func main() {
+	input := flag.String("input", "", "The bash expr content to be parsed.")
+	out_path := flag.String("output", "", "The output file path to store the JSON content.")
+	flag.Parse()
+
+	var json_content string
+	var parse_err error
+	if len(*input) <= 0 {
+		fmt.Fprintln(os.Stderr, "Missing bash expr input.")
+		flag.PrintDefaults()
+		os.Exit(1)
+	} else {
+		// Read the bash script from command line argument.
+		json_content, parse_err = bashparser.ParseExpr(*input)
+	}
+
+	if parse_err != nil {
+		fmt.Fprintln(os.Stderr, parse_err.Error())
+		os.Exit(2)
+	}
+
+	fmt.Println(json_content)
+
+	if len(*out_path) > 0 {
+		err := filewriter.StoreBytesToFile([]byte(json_content), *out_path)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, err.Error())
+			os.Exit(1)
+		}
+	}
+
+	os.Exit(0)
+}
diff --git a/golang/cmd/bashparser/bashparser.go b/golang/cmd/bashparser/bashparser.go
index ed598ea28..50cc6fec2 100644
--- a/golang/cmd/bashparser/bashparser.go
+++ b/golang/cmd/bashparser/bashparser.go
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 package main
@@ -29,13 +29,14 @@ func main() {
 	file_path := flag.String("file", "", "The path of the bash script file.")
 	input := flag.String("input", "", "The bash script content to be parsed. Input is prioritized over file option.")
 	out_path := flag.String("output", "", "The output file path to store the JSON content.")
+	raw := flag.Bool("raw", false, "Return raw parse-tree")
 	flag.Parse()
 
 	var json_content string
 	var parse_err error
 	if len(*input) > 0 {
 		// Read the bash script from command line argument.
-		json_content, parse_err = bashparser.ParseCommands(*input)
+		json_content, parse_err = bashparser.Parse(*input, *raw)
 	} else if len(*file_path) <= 0 {
 		fmt.Fprintln(os.Stderr, "Missing bash script input or file path.")
 		flag.PrintDefaults()
@@ -47,7 +48,7 @@ func main() {
 			fmt.Fprintln(os.Stderr, read_err.Error())
 			os.Exit(1)
 		}
-		json_content, parse_err = bashparser.ParseCommands(string(data))
+		json_content, parse_err = bashparser.Parse(string(data), *raw)
 	}
 
 	if parse_err != nil {
diff --git a/golang/internal/bashparser/bashparser.go b/golang/internal/bashparser/bashparser.go
index a033e6f73..b88e43a6e 100644
--- a/golang/internal/bashparser/bashparser.go
+++ b/golang/internal/bashparser/bashparser.go
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 // Package bashparser parses the bash scripts and provides parsed objects in JSON.
@@ -11,6 +11,7 @@ import (
 	"strings"
 
 	"mvdan.cc/sh/v3/syntax"
+	"mvdan.cc/sh/v3/syntax/typedjson"
 )
 
 // CMDResult is used to export the bash command results in JSON.
@@ -68,3 +69,63 @@ func ParseCommands(data string) (string, error) {
 	return string(result_bytes), nil
 
 }
+
+func ParseRaw(data string) (string, error) {
+	// Replace GitHub Actions's expressions with ``$MACARON_UNKNOWN``` variable because the bash parser
+	// doesn't recognize such expressions. For example: ``${{ foo }}`` will be replaced by ``$MACARON_UNKNOWN``.
+	// Note that we don't use greedy matching, so if we have `${{ ${{ foo }} }}`, it will not be replaced by
+	// `$MACARON_UNKNOWN`.
+	// See: https://docs.github.com/en/actions/learn-github-actions/expressions.
+	var re, reg_error = regexp.Compile(`\$\{\{.*?\}\}`)
+	if reg_error != nil {
+		return "", reg_error
+	}
+
+	// We replace the GH Actions variables with "$MACARON_UNKNOWN".
+	data = string(re.ReplaceAll([]byte(data), []byte("$$MACARON_UNKNOWN")))
+	data_str := strings.NewReader(data)
+	data_parsed, parse_err := syntax.NewParser().Parse(data_str, "")
+	if parse_err != nil {
+		return "", parse_err
+	}
+
+	b := new(strings.Builder)
+	encode_err := typedjson.Encode(b, data_parsed)
+	if encode_err != nil {
+		return "", encode_err
+	}
+
+	return b.String(), nil
+}
+
+func Parse(data string, raw bool) (string, error) {
+	if raw {
+		return ParseRaw(data)
+	} else {
+		return ParseCommands(data)
+	}
+}
+
+func ParseExpr(data string) (string, error) {
+	data_str := strings.NewReader(data)
+	result_str := "["
+	first := true
+	for word_parsed, parse_err := range syntax.NewParser().WordsSeq(data_str) {
+		if parse_err != nil {
+			return "", parse_err
+		}
+		b := new(strings.Builder)
+		encode_err := typedjson.Encode(b, word_parsed)
+		if encode_err != nil {
+			return "", encode_err
+		}
+		if first {
+			result_str = result_str + b.String()
+			first = false
+		} else {
+			result_str = result_str + ", " + b.String()
+		}
+	}
+	result_str = result_str + "]"
+	return result_str, nil
+}
diff --git a/pyproject.toml b/pyproject.toml
index 336e611af..65fd534dc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,8 @@ dependencies = [
     "semgrep == 1.113.0",
     "email-validator >=2.2.0,<3.0.0",
     "rich >=13.5.3,<15.0.0",
+    "lark >= 1.3.0,<2.0.0",
+    "frozendict >= 2.4.6, <3.0.0",
 ]
 keywords = []
 # https://pypi.org/classifiers/
diff --git a/src/macaron/code_analyzer/call_graph.py b/src/macaron/code_analyzer/call_graph.py
deleted file mode 100644
index 1f3be3fac..000000000
--- a/src/macaron/code_analyzer/call_graph.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
-
-"""This module contains classes to generate build call graphs for the target repository."""
-
-from collections import deque
-from collections.abc import Iterable
-from typing import Any, Generic, TypeVar
-
-Node = TypeVar("Node", bound="BaseNode")
-# The documentation below for `TypeVar` is commented out due to a breaking
-# change in Sphinx version (^=6.1.0).
-# Reported at: https://github.com/oracle/macaron/issues/58.
-# """This binds type ``Node`` to ``BaseNode`` and any of its subclasses.
-
-# Therefore, any node of type ``Node`` that is stored in the call graph
-# container will be a subtype of ``BaseNode``.
-# """
-
-
-class BaseNode(Generic[Node]):
-    """This is the generic class for call graph nodes."""
-
-    def __init__(self, caller: Node | None = None, node_id: str | None = None) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        caller: Node | None
-            The caller node.
-        node_id: str | None
-            The unique identifier of a node in the callgraph.
-        """
-        self.callee: list[Node] = []
-        self.caller: Node | None = caller
-        # Each node can have a model that summarizes certain properties for static analysis.
-        # By default this model is set to None.
-        self.model: Any = None
-        self.node_id = node_id
-
-    def add_callee(self, node: Node) -> None:
-        """Add a callee to the current node.
-
-        Parameters
-        ----------
-        node : Node
-            The callee node.
-        """
-        self.callee.append(node)
-
-    def has_callee(self) -> bool:
-        """Check if the current node has callees.
-
-        Returns
-        -------
-        bool
-            Return False if there are no callees, otherwise True.
-        """
-        return bool(self.callee)
-
-
-class CallGraph(Generic[Node]):
-    """This is the generic class for creating a call graph."""
-
-    def __init__(self, root: Node, repo_path: str) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        root : Node
-            The root call graph node.
-        repo_path : str
-            The path to the repo.
-        """
-        self.root = root
-        self.repo_path = repo_path
-
-    def get_root(self) -> Node:
-        """Get the root node in the call graph.
-
-        Returns
-        -------
-        Node
-            The root node.
-        """
-        return self.root
-
-    def bfs(self) -> Iterable[Node]:
-        """Traverse the call graph in breadth first search order.
-
-        Yields
-        ------
-        Node
-            The traversed nodes.
-        """
-        queue: deque[Node] = deque()
-        queue.extend(self.root.callee)
-        visited = []
-        while queue:
-            node = queue.popleft()
-            if node not in visited:
-                queue.extend(node.callee)
-                visited.append(node)
-                yield node
diff --git a/src/macaron/code_analyzer/dataflow_analysis/__init__.py b/src/macaron/code_analyzer/dataflow_analysis/__init__.py
new file mode 100644
index 000000000..8e17a3508
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
diff --git a/src/macaron/code_analyzer/dataflow_analysis/analysis.py b/src/macaron/code_analyzer/dataflow_analysis/analysis.py
new file mode 100644
index 000000000..6f7c3f35f
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/analysis.py
@@ -0,0 +1,469 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Entry points to perform and use the dataflow analysis."""
+
+from __future__ import annotations
+
+from collections.abc import Iterable
+
+from macaron.code_analyzer.dataflow_analysis import bash, core, evaluation, facts, github, printing
+from macaron.errors import CallGraphError
+from macaron.parsers import actionparser, github_workflow_model
+from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, BuildToolCommand
+
+
+def analyse_github_workflow_file(workflow_path: str, repo_path: str | None, dump_debug: bool = False) -> core.Node:
+    """Perform dataflow analysis for GitHub Actions Workflow file.
+
+    Parameters
+    ----------
+    workflow_path: str
+        The path to workflow file.
+    repo_path: str | None
+        The path to the repo.
+    dump_debug: bool
+        Whether to output debug dot file (in the current working directory).
+
+    Returns
+    -------
+    core.Node
+        Graph representation of workflow and analysis results.
+    """
+    workflow = actionparser.parse(workflow_path)
+
+    analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path))
+
+    core.reset_debug_sequence_number()
+    raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_path)
+    core.increment_debug_sequence_number()
+
+    raw_workflow_node.analyse()
+
+    if dump_debug:
+        with open("analysis." + workflow_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f:
+            printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True)
+
+    return raw_workflow_node
+
+
+def analyse_github_workflow(
+    workflow: github_workflow_model.Workflow, workflow_source_path: str, repo_path: str | None, dump_debug: bool = False
+) -> core.Node:
+    """Perform dataflow analysis for GitHub Actions Workflow.
+
+    Parameters
+    ----------
+    workflow: github_workflow_model.Workflow
+        The workflow.
+    workflow_path: str
+        The source path for the workflow.
+    repo_path: str | None
+        The path to the repo.
+    dump_debug: bool
+        Whether to output debug dot file (in the current working directory).
+
+    Returns
+    -------
+    core.Node
+        Graph representation of workflow and analysis results.
+    """
+    analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path))
+
+    core.reset_debug_sequence_number()
+    raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path)
+    core.increment_debug_sequence_number()
+
+    raw_workflow_node.analyse()
+
+    if dump_debug:
+        with open("analysis." + workflow_source_path.replace("/", "_") + ".dot", "w", encoding="utf-8") as f:
+            printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True)
+
+    return raw_workflow_node
+
+
+def analyse_bash_script(
+    bash_content: str, source_path: str, repo_path: str | None, dump_debug: bool = False
+) -> core.Node:
+    """Perform dataflow analysis for Bash script.
+
+    Parameters
+    ----------
+    bash_content: str
+        The Bash script content.
+    source_path: str
+        The source path for the Bash script.
+    repo_path: str | None
+        The path to the repo.
+    dump_debug: bool
+        Whether to output debug dot file (in the current working directory).
+
+    Returns
+    -------
+    core.Node
+        Graph representation of Bash script and analysis results.
+    """
+    analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path))
+    bash_context = core.OwningContextRef(bash.BashScriptContext.create_in_isolation(analysis_context, source_path))
+    core.reset_debug_sequence_number()
+    bash_node = bash.RawBashScriptNode(facts.StringLiteral(bash_content), bash_context)
+    core.increment_debug_sequence_number()
+
+    bash_node.analyse()
+
+    if dump_debug:
+        with open(
+            "analysis." + source_path.replace("/", "_") + "." + str(hash(bash_content)) + ".dot", "w", encoding="utf-8"
+        ) as f:
+            printing.print_as_dot_graph(bash_node, f, include_properties=True, include_states=True)
+
+    return bash_node
+
+
+# TODO generalise visitors
+class FindSecretsVisitor:
+    """Visitor to find references to GitHub secrets in analysis expressions."""
+
+    #: Scope in which secrets may be found
+    workflow_var_scope: facts.Scope
+    #: Found secret variable names, populated by running the visitor
+    secrets: set[str]
+
+    def __init__(self, workflow_var_scope: facts.Scope) -> None:
+        """Construct a visitor to find secrets.
+
+        Parameters
+        ----------
+        workflow_var_scope: facts.Scope
+            Scope in which secrets may be found
+        """
+        self.workflow_var_scope = workflow_var_scope
+        self.secrets = set()
+
+    def visit_value(self, value: facts.Value) -> None:
+        """Search value expression for secrets."""
+        match value:
+            case facts.StringLiteral(_):
+                return
+            case facts.Read(loc):
+                self.visit_location(loc)
+                if evaluation.scope_matches(loc.scope, self.workflow_var_scope):
+                    match loc.loc:
+                        case facts.Variable(facts.StringLiteral(name)):
+                            if name.startswith("secrets."):
+                                self.secrets.add(name[len("secrets.") :])
+                return
+            case facts.ArbitraryNewData(_):
+                return
+            case facts.UnaryStringOp(_, operand):
+                self.visit_value(operand)
+                return
+            case facts.BinaryStringOp(_, operand1, operand2):
+                self.visit_value(operand1)
+                self.visit_value(operand2)
+                return
+            case facts.ParameterPlaceholderValue(name):
+                return
+            case facts.InstalledPackage(name, version, distribution, url):
+                self.visit_value(name)
+                self.visit_value(version)
+                self.visit_value(distribution)
+                self.visit_value(url)
+                return
+            case facts.Symbolic(sym_val):
+                self.visit_value(sym_val)
+                return
+        raise CallGraphError("unknown facts.Value type: " + value.__class__.__name__)
+
+    def visit_location(self, location: facts.Location) -> None:
+        """Search location expression for secrets."""
+        self.visit_location_specifier(location.loc)
+
+    def visit_location_specifier(self, location: facts.LocationSpecifier) -> None:
+        """Search location expression for secrets."""
+        match location:
+            case facts.Filesystem(path):
+                self.visit_value(path)
+                return
+            case facts.Variable(name):
+                self.visit_value(name)
+                return
+            case facts.Artifact(name, file):
+                self.visit_value(name)
+                self.visit_value(file)
+                return
+            case facts.FilesystemAnyUnderDir(path):
+                self.visit_value(path)
+                return
+            case facts.ArtifactAnyFilename(name):
+                self.visit_value(name)
+                return
+            case facts.ParameterPlaceholderLocation(name):
+                return
+            case facts.Console():
+                return
+            case facts.Installed(name):
+                self.visit_value(name)
+                return
+        raise CallGraphError("unknown location type: " + location.__class__.__name__)
+
+
+def get_reachable_secrets(bash_cmd_node: bash.BashSingleCommandNode) -> set[str]:
+    """Get GitHub secrets that are reachable at a bash command.
+
+    Parameters
+    ----------
+    bash_cmd_node: bash.BashSingleCommandNode
+        The target Bash command node.
+
+    Returns
+    -------
+    set[str]
+        The set of reachable secret variable names.
+    """
+    result: set[str] = set()
+    github_context = bash_cmd_node.context.ref.get_containing_github_context()
+    if github_context is None:
+        return result
+    env_scope = bash_cmd_node.context.ref.env.ref
+    workflow_var_scope = github_context.job_context.ref.workflow_context.ref.workflow_variables.ref
+
+    for loc, vals in bash_cmd_node.before_state.state.items():
+        if evaluation.scope_matches(env_scope, loc.scope):
+            for val in vals:
+                visitor = FindSecretsVisitor(workflow_var_scope)
+                visitor.visit_value(val)
+                result.update(visitor.secrets)
+
+    return result
+
+
+def get_containing_github_job(
+    node: core.Node, parents: dict[core.Node, core.Node]
+) -> github.GitHubActionsNormalJobNode | None:
+    """Return the GitHub job node containing the given node, if any.
+
+    Parameters
+    ----------
+    node: core.Node
+        The target node.
+    parents: dict[core.Node, code.Node]
+        The mapping of nodes to their parent nodes.
+
+    Returns
+    -------
+    github.GitHubActionsNormalJobNode | None
+        The containing job node, or None if there is no containing job.
+    """
+    caller_node: core.Node | None = parents.get(node)
+    while caller_node is not None:
+        match caller_node:
+            case github.GitHubActionsWorkflowNode():
+                break
+            case github.GitHubActionsNormalJobNode():
+                return caller_node
+
+        caller_node = parents.get(caller_node)
+
+    return None
+
+
+def get_containing_github_step(
+    node: core.Node, parents: dict[core.Node, core.Node]
+) -> github.GitHubActionsRunStepNode | None:
+    """Return the GitHub step node containing the given node, if any.
+
+    Parameters
+    ----------
+    node: core.Node
+        The target node.
+    parents: dict[core.Node, code.Node]
+        The mapping of nodes to their parent nodes.
+
+    Returns
+    -------
+    github.GitHubActionsRunStepNode | None
+        The containing step node, or None if there is no containing step.
+    """
+    caller_node: core.Node | None = parents.get(node)
+    while caller_node is not None:
+        match caller_node:
+            case github.GitHubActionsWorkflowNode():
+                break
+            case github.GitHubActionsNormalJobNode():
+                break
+            case github.GitHubActionsRunStepNode():
+                return caller_node
+
+        caller_node = parents.get(caller_node)
+
+    return None
+
+
+def get_containing_github_workflow(
+    node: core.Node, parents: dict[core.Node, core.Node]
+) -> github.GitHubActionsWorkflowNode | None:
+    """Return the GitHub workflow node containing the given node, if any.
+
+    Parameters
+    ----------
+    node: core.Node
+        The target node.
+    parents: dict[core.Node, code.Node]
+        The mapping of nodes to their parent nodes.
+
+    Returns
+    -------
+    github.GitHubActionsWorkflowNode | None
+        The containing workflow node, or None if there is no containing workflow.
+    """
+    caller_node: core.Node | None = parents.get(node)
+    while caller_node is not None:
+        match caller_node:
+            case github.GitHubActionsWorkflowNode():
+                return caller_node
+
+        caller_node = parents.get(caller_node)
+
+    return None
+
+
+def _get_build_tool_commands(nodes: core.NodeForest, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
+    """Traverse the callgraph and find all the reachable build tool commands."""
+    for root in nodes.root_nodes:
+        for node in core.traverse_bfs(root):
+            # We are just interested in nodes that have bash commands.
+            if isinstance(node, bash.BashSingleCommandNode):
+                # We collect useful contextual information for the called BashNode.
+                # The GitHub Actions workflow that triggers the path in the callgraph.
+                workflow_node = None
+                # The step in GitHub Actions job that triggers the path in the callgraph.
+                step_node = None
+
+                # Walk up the callgraph to find the relevant caller nodes.
+                # In GitHub Actions a `GitHubWorkflowNode` may call several `GitHubJobNode`s
+                # and a `GitHubJobNode` may call several steps, which can be external `GitHubWorkflowNode`
+                # or inlined run nodes.
+                # TODO: revisit this implementation if analysis of external workflows is supported in
+                # the future, and decide if setting the caller workflow and job nodes to the nodes in the
+                # main triggering workflow is still expected.
+                workflow_node = get_containing_github_workflow(node, nodes.parents)
+                step_node = get_containing_github_step(node, nodes.parents)
+
+                # Find the bash commands that call the build tool.
+                resolved_cmds = evaluation.evaluate(node, node.cmd)
+                resolved_args = [evaluation.evaluate(node, arg) if arg is not None else None for arg in node.args]
+
+                # TODO combinations
+
+                cmd = [evaluation.get_single_resolved_str_with_default(resolved_cmds, "$MACARON_UNKNOWN")] + [
+                    (
+                        evaluation.get_single_resolved_str_with_default(resolved_arg, "$MACARON_UNKNOWN")
+                        if resolved_arg is not None
+                        else "$MACARON_UNKNOWN"
+                    )
+                    for resolved_arg in resolved_args
+                ]
+
+                if build_tool.is_build_command(cmd):
+                    lang_versions = lang_distributions = lang_url = None
+                    evaluated_installed_languages = evaluation.evaluate(
+                        node,
+                        facts.Read(
+                            facts.Location(
+                                node.context.ref.filesystem.ref,
+                                facts.Installed(facts.StringLiteral(build_tool.language)),
+                            )
+                        ),
+                    )
+                    evaluated_installed_languages = evaluation.filter_symbolic_values(evaluated_installed_languages)
+
+                    lang_versions = []
+                    lang_distributions = []
+                    lang_urls = []
+
+                    for evaluated_installed_language in evaluated_installed_languages:
+                        if isinstance(evaluated_installed_language[0], facts.InstalledPackage):
+                            if isinstance(evaluated_installed_language[0].version, facts.StringLiteral):
+                                lang_version_str = evaluated_installed_language[0].version.literal
+                                if lang_version_str not in lang_versions:
+                                    lang_versions.append(lang_version_str)
+                            if isinstance(evaluated_installed_language[0].distribution, facts.StringLiteral):
+                                lang_distribution_str = evaluated_installed_language[0].distribution.literal
+                                if lang_distribution_str not in lang_distributions:
+                                    lang_distributions.append(lang_distribution_str)
+                            if isinstance(evaluated_installed_language[0].url, facts.StringLiteral):
+                                lang_url_str = evaluated_installed_language[0].url.literal
+                                if lang_url_str not in lang_urls:
+                                    lang_urls.append(lang_url_str)
+
+                    lang_url = lang_urls[0] if len(lang_urls) > 0 else ""
+
+                    lang_versions = sorted(lang_versions)
+                    lang_distributions = sorted(lang_distributions)
+                    lang_urls = sorted(lang_urls)
+
+                    yield BuildToolCommand(
+                        ci_path=(
+                            workflow_node.context.ref.source_filepath
+                            if workflow_node is not None
+                            else node.context.ref.source_filepath
+                        ),
+                        command=cmd,
+                        step_node=step_node,
+                        language=build_tool.language,
+                        language_versions=lang_versions,
+                        language_distributions=lang_distributions,
+                        language_url=lang_url,
+                        reachable_secrets=list(get_reachable_secrets(node)),
+                        events=get_ci_events_from_workflow(workflow_node.definition) if workflow_node else [],
+                    )
+
+
+def get_build_tool_commands(nodes: core.NodeForest, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
+    """Traverse the callgraph and find all the reachable build tool commands.
+
+    This generator yields sorted build tool command objects to allow a deterministic behavior.
+    The objects are sorted based on the string representation of the build tool object.
+
+    Parameters
+    ----------
+    nodes: core.NodeForest
+        The callgraph reachable from the CI workflows.
+    build_tool: BaseBuildTool
+        The corresponding build tool for which shell commands need to be detected.
+
+    Yields
+    ------
+    BuildToolCommand
+        The object that contains the build command as well useful contextual information.
+    """
+    return sorted(_get_build_tool_commands(nodes, build_tool), key=str)
+
+
+def get_ci_events_from_workflow(workflow: github_workflow_model.Workflow) -> list[str]:
+    """Get the CI events that trigger the GitHub Action workflow.
+
+    Parameters
+    ----------
+    workflow: github_workflow_model.Workflow
+        The target GitHub Action workflow.
+
+    Returns
+    -------
+    list[str]
+        The list of event names.
+    """
+    result: list[str] = []
+    on = workflow["on"]
+    if isinstance(on, str):
+        result.append(on)
+    elif isinstance(on, list):
+        for hook in on:
+            result.append(hook)
+    else:
+        for key in on:
+            result.append(key)
+
+    return result
diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py
new file mode 100644
index 000000000..f350448a5
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py
@@ -0,0 +1,1891 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Dataflow analysis implementation for analysing Bash shell scripts."""
+
+from __future__ import annotations
+
+import json
+import os.path
+from collections import defaultdict
+from collections.abc import Callable, Iterator
+from dataclasses import dataclass
+from itertools import product
+from typing import cast
+
+from macaron import MACARON_PATH
+from macaron.code_analyzer.dataflow_analysis import core, evaluation, facts, github, models, printing
+from macaron.errors import CallGraphError, ParseError
+from macaron.parsers import bashparser, bashparser_model
+
+
+class BashExit(core.ExitType):
+    """Exit type for Bash exit statement."""
+
+    def __hash__(self) -> int:
+        return 37199
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, BashExit)
+
+
+# Convenience instance of BashExit.
+BASH_EXIT = BashExit()
+
+
+class BashReturn(core.ExitType):
+    """Exit type for returning from a Bash function."""
+
+    def __hash__(self) -> int:
+        return 91193
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, BashReturn)
+
+
+# Convenience instance of BashReturn.
+BASH_RETURN = BashReturn()
+
+
+@dataclass(frozen=True)
+class BashScriptContext(core.Context):
+    """Context for a Bash script."""
+
+    #: Outer context, which may be a GitHub run step, another Bash script
+    #: that ran this script, or just the outermost analysis context if analysing
+    #: the script in isolation.
+    outer_context: (
+        core.ContextRef[github.GitHubActionsStepContext]
+        | core.ContextRef[BashScriptContext]
+        | core.ContextRef[core.AnalysisContext]
+    )
+    #: Scope for filesystem used by the script.
+    filesystem: core.ContextRef[facts.Scope]
+    #: Scope for env variables within the script.
+    env: core.ContextRef[facts.Scope]
+    #: Scope for defined functions within the script.
+    func_decls: core.ContextRef[facts.Scope]
+    #: Scope for the stdin attached to the Bash process.
+    stdin_scope: core.ContextRef[facts.Scope]
+    #: Location for the stdin attached to the Bash process.
+    stdin_loc: facts.LocationSpecifier
+    #: Scope for the stdout attached to the Bash process.
+    stdout_scope: core.ContextRef[facts.Scope]
+    #: Location for the stdout attached to the Bash process.
+    stdout_loc: facts.LocationSpecifier
+    #: Filepath for Bash script file.
+    source_filepath: str
+
+    @staticmethod
+    def create_from_run_step(
+        context: core.ContextRef[github.GitHubActionsStepContext], source_filepath: str
+    ) -> BashScriptContext:
+        """Create a new Bash script context (for being called from a GitHub step) and its associated scopes.
+
+        Reuses the filesystem and stdout scopes from the outer context, env scope inherits from the outer scope.
+
+        Parameters
+        ----------
+        context: core.ContextRef[github.GitHubActionsStepContext]
+            Outer step context.
+        source_filepath: str
+            Filepath of Bash script file.
+
+        Returns
+        -------
+        BashScriptContext
+            The new Bash script context.
+        """
+        return BashScriptContext(
+            context.get_non_owned(),
+            context.ref.job_context.ref.filesystem.get_non_owned(),
+            core.OwningContextRef(facts.Scope("env", context.ref.env.ref)),
+            core.OwningContextRef(facts.Scope("func_decls")),
+            stdin_scope=core.OwningContextRef(facts.Scope("stdin")),
+            stdin_loc=facts.Console(),
+            stdout_scope=context.ref.job_context.ref.workflow_context.ref.console.get_non_owned(),
+            stdout_loc=facts.Console(),
+            source_filepath=source_filepath,
+        )
+
+    @staticmethod
+    def create_from_bash_script(context: core.ContextRef[BashScriptContext], source_filepath: str) -> BashScriptContext:
+        """Create a new Bash script context (for being called from another Bash script) and its associated scopes.
+
+        Reuses the filesystem, stdin, and stdout scopes from the outer context, env scope inherits from the outer context.
+
+        Parameters
+        ----------
+        context: core.ContextRef[BashScriptContext]
+            Outer Bash script context.
+        source_filepath: str
+            Filepath of Bash script file.
+
+        Returns
+        -------
+        BashScriptContext
+            The new Bash script context.
+        """
+        return BashScriptContext(
+            context.get_non_owned(),
+            context.ref.filesystem.get_non_owned(),
+            core.OwningContextRef(facts.Scope("env", context.ref.env.ref)),
+            core.OwningContextRef(facts.Scope("func_decls")),
+            stdin_scope=context.ref.stdin_scope.get_non_owned(),
+            stdin_loc=facts.Console(),
+            stdout_scope=context.ref.stdout_scope.get_non_owned(),
+            stdout_loc=facts.Console(),
+            source_filepath=source_filepath,
+        )
+
+    @staticmethod
+    def create_in_isolation(context: core.ContextRef[core.AnalysisContext], source_filepath: str) -> BashScriptContext:
+        """Create a new Bash script context (for being analysed in isolation) and its associated scopes.
+
+        Parameters
+        ----------
+        context: core.ContextRef[core.AnalysisContext]
+            Outer analysis context.
+        source_filepath: str
+            Filepath of Bash script file.
+
+        Returns
+        -------
+        BashScriptContext
+            The new Bash script context.
+        """
+        return BashScriptContext(
+            context.get_non_owned(),
+            core.OwningContextRef(facts.Scope("filesystem")),
+            core.OwningContextRef(facts.Scope("env")),
+            core.OwningContextRef(facts.Scope("func_decls")),
+            stdin_scope=core.OwningContextRef(facts.Scope("stdin")),
+            stdin_loc=facts.Console(),
+            stdout_scope=core.OwningContextRef(facts.Scope("stdout")),
+            stdout_loc=facts.Console(),
+            source_filepath=source_filepath,
+        )
+
+    def with_stdin(
+        self, stdin_scope: core.ContextRef[facts.Scope], stdin_loc: facts.LocationSpecifier
+    ) -> BashScriptContext:
+        """Return a modified bash script context with the given stdin."""
+        return BashScriptContext(
+            self.outer_context,
+            self.filesystem,
+            self.env,
+            self.func_decls,
+            stdin_scope,
+            stdin_loc,
+            self.stdout_scope,
+            self.stdout_loc,
+            self.source_filepath,
+        )
+
+    def with_stdout(
+        self, stdout_scope: core.ContextRef[facts.Scope], stdout_loc: facts.LocationSpecifier
+    ) -> BashScriptContext:
+        """Return a modified bash script context with the given stdout."""
+        return BashScriptContext(
+            self.outer_context,
+            self.filesystem,
+            self.env,
+            self.func_decls,
+            self.stdin_scope,
+            self.stdin_loc,
+            stdout_scope,
+            stdout_loc,
+            self.source_filepath,
+        )
+
+    def get_containing_github_context(self) -> github.GitHubActionsStepContext | None:
+        """Return the (possibly transitive) containing GitHub step context, if there is one."""
+        outer_context = self.outer_context.ref
+        while isinstance(outer_context, BashScriptContext):
+            outer_context = outer_context.outer_context.ref
+
+        if isinstance(outer_context, github.GitHubActionsStepContext):
+            return outer_context
+        return None
+
+    def get_containing_analysis_context(self) -> core.AnalysisContext:
+        """Return the (possibly transitive) containing analysis context."""
+        outer_context = self.outer_context.ref
+        while isinstance(outer_context, BashScriptContext):
+            outer_context = outer_context.outer_context.ref
+
+        if isinstance(outer_context, github.GitHubActionsStepContext):
+            return outer_context.job_context.ref.workflow_context.ref.analysis_context.ref
+
+        return outer_context
+
+    def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+        yield self.outer_context
+        yield self.filesystem
+        yield self.env
+        yield self.func_decls
+        yield self.stdin_scope
+        yield self.stdout_scope
+
+
+class RawBashScriptNode(core.InterpretationNode):
+    """Interpretation node representing a Bash script (with the script as an unparsed string value).
+
+    Defines how to resolve and parse the Bash script content and generate the analysis representation.
+    """
+
+    #: Value for Bash script content (as a string).
+    script: facts.Value
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+
+    def __init__(self, script: facts.Value, context: core.ContextRef[BashScriptContext]) -> None:
+        """Initialize Bash script node.
+
+        Parameters
+        ----------
+        script: facts.Value
+            Value for Bash script content (as a string).
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.script = script
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret the Bash script to resolve and parse the Bash script content and generate the analysis representation."""
+        if isinstance(self.script, facts.StringLiteral):
+            script_str = self.script.literal
+
+            def build_bash_script() -> core.Node:
+                try:
+                    parsed_bash = bashparser.parse_raw(script_str, MACARON_PATH)
+                    return BashScriptNode.create(parsed_bash, self.context.get_non_owned())
+                except ParseError:
+                    return core.NoOpStatementNode()
+
+            return {"default": build_bash_script}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+
+class BashScriptNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a Bash script.
+
+    Control flow structure consists of a sequence of Bash statements.
+    Note that this can model complex control flow with branching, loops, etc.
+    because those control flow constructs will be statement nodes with their
+    own control flow nested within.
+
+    Control flow that the cuts across multiple levels, such as an exit statement
+    within a if statement branch that would cause the entire script to exit
+    early, are modelled using the alternate exits mechanism (i.e. exit statement
+    creates a BashExit exit state, in the enclosing control-flow constructs the
+    successor of the BashExit exit of a child node will be an early BashExit exit
+    of that construct, and so on up until this node, where there will be a early
+    normal exit, and so the caller of this script would then proceed as normal after
+    the script exits).
+    """
+
+    #: Parsed Bash script AST.
+    definition: bashparser_model.File
+    #: Statement nodes in execution order.
+    stmts: list[BashStatementNode]
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.File,
+        stmts: list[BashStatementNode],
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash script node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.File
+            Parsed Bash script AST.
+        stmts: list[BashStatementNode]
+            Statement nodes in execution order.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.stmts = stmts
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(self.stmts)
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the nodes in the sequence."""
+        yield from self.stmts
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node, the first statement in the sequence."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns the next in the sequence or the exit in the case of the last node, or an
+        early exit in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {core.DEFAULT_EXIT}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(script: bashparser_model.File, context: core.NonOwningContextRef[BashScriptContext]) -> BashScriptNode:
+        """Create Bash script node from Bash script AST.
+
+        Parameters
+        ----------
+        script: bashparser_model.File
+            Parsed Bash script AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        stmts = [BashStatementNode(stmt, context) for stmt in script["Stmts"]]
+        return BashScriptNode(script, stmts, context)
+
+
+class BashBlockNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a Bash block.
+
+    Control flow structure consists of a sequence of Bash statements.
+    """
+
+    #: Parsed block AST or list of statement ASTs.
+    definition: bashparser_model.Block | list[bashparser_model.Stmt]
+    #: Statement nodes in execution order.
+    stmts: list[BashStatementNode]
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.Block | list[bashparser_model.Stmt],
+        stmts: list[BashStatementNode],
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash block node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.Block | list[bashparser_model.Stmt]
+            Parsed block AST or list of statement ASTs.
+        stmts: list[BashStatementNode]
+            Statement nodes in execution order.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.stmts = stmts
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(self.stmts)
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the nodes in the sequence."""
+        yield from self.stmts
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node, the first statement in the sequence."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns the next in the sequence or the exit in the case of the last node, or a
+        propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if isinstance(self.definition, list):
+            if len(self.definition) > 0:
+                result["line num (in script)"] = {(None, str(self.definition[0]["Pos"]["Line"]))}
+        else:
+            result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        script: bashparser_model.Block | list[bashparser_model.Stmt],
+        context: core.NonOwningContextRef[BashScriptContext],
+    ) -> BashBlockNode:
+        """Create Bash block node from block AST or list of statement ASTs.
+
+        Parameters
+        ----------
+        script: bashparser_model.Block | list[bashparser_model.Stmt]
+            Parsed block AST or list of statement ASTs.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        if isinstance(script, list):
+            stmts = [BashStatementNode(stmt, context) for stmt in script]
+        else:
+            stmts = [BashStatementNode(stmt, context) for stmt in script["Stmts"]]
+        return BashBlockNode(script, stmts, context)
+
+
+class BashFuncCallNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a call to a Bash function.
+
+    Control flow structure consists of a single block containing the function body.
+    """
+
+    #: The parsed AST of the callsite statement.
+    call_definition: bashparser_model.Stmt
+    #: The parsed AST of the function declaration.
+    func_definition: bashparser_model.FuncDecl
+    #: Node representing the function body.
+    block: BashBlockNode
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+
+    def __init__(
+        self,
+        call_definition: bashparser_model.Stmt,
+        func_definition: bashparser_model.FuncDecl,
+        block: BashBlockNode,
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash function call node.
+
+        Parameters
+        ----------
+        call_definition: bashparser_model.Stmt
+            The parsed AST of the callsite statement.
+        func_definition: bashparser_model.FuncDecl
+            The parsed AST of the function declaration.
+        block: BashBlockNode
+            Node representing the function body.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.call_definition = call_definition
+        self.func_definition = func_definition
+        self.block = block
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence([self.block])
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the function body block node."""
+        yield self.block
+
+    def get_entry(self) -> core.Node:
+        """Return the function body block node."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns the next node in the sequence or the exit in the case of the last node, or an
+        early exit in the case of a BashReturn exit type, or a propagated early BashExit exit
+        in the case of a BashExit exit type.
+        """
+        if isinstance(exit_type, BashReturn):
+            return {core.DEFAULT_EXIT}
+        if isinstance(exit_type, BashExit):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table.
+
+        Contains the line number of the callsite, the line number of the function declaration, and the scopes.
+        """
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.call_definition["Pos"]["Line"]))}
+        result["callee decl line num (in script)"] = {(None, str(self.func_definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+
+def get_stdout_redirects(stmt: bashparser_model.Stmt, context: BashScriptContext) -> set[facts.Location]:
+    """Extract the stdout redirects specified on the statement as a set of location expressions."""
+    redirs: set[facts.Location] = set()
+    for redir in stmt.get("Redirs", []):
+        if redir["Op"] in {
+            bashparser_model.RedirOperators.RdrOut.value,
+            bashparser_model.RedirOperators.RdrAll.value,
+            bashparser_model.RedirOperators.AppAll.value,
+            bashparser_model.RedirOperators.AppOut.value,
+        }:
+            if "Word" in redir:
+                redir_word = redir["Word"]
+                redir_val = convert_shell_word_to_value(redir_word, context)
+                if redir_val is not None:
+                    redirs.add(facts.Location(context.filesystem.ref, facts.Filesystem(redir_val[0])))
+    return redirs
+
+
+class BashStatementNode(core.InterpretationNode):
+    """Interpretation node representing any kind of Bash statement.
+
+    Defines how to interpret the different kinds of statements and generate the appropriate
+    analysis representation.
+    """
+
+    #: The parsed statement AST.
+    definition: bashparser_model.Stmt
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+
+    def __init__(self, definition: bashparser_model.Stmt, context: core.ContextRef[BashScriptContext]) -> None:
+        """Initialize statement node."""
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret the different kinds of statements and generate the appropriate analysis representation."""
+        cmd = self.definition["Cmd"]
+        if (
+            bashparser_model.is_call_expr(cmd)
+            and len(cmd.get("Args", [])) == 0
+            and "Assigns" in cmd
+            and len(cmd["Assigns"]) == 1
+        ):
+            # Single variable assignment statement.
+            assign = cmd["Assigns"][0]
+
+            def build_assign() -> core.Node:
+                rhs_content = (
+                    parse_content(assign["Value"]["Parts"], True)
+                    if "Value" in assign
+                    else [LiteralOrEnvVar(is_env_var=False, literal="")]
+                )
+                if rhs_content is not None:
+                    rhs_val = convert_shell_value_sequence_to_fact_value(rhs_content, self.context.ref)
+                    return models.VarAssignNode(
+                        kind=models.VarAssignKind.BASH_ENV_VAR,
+                        var_scope=self.context.ref.env.ref,
+                        var_name=facts.StringLiteral(assign["Name"]["Value"]),
+                        value=rhs_val,
+                    )
+                return core.NoOpStatementNode()
+
+            return {"default": build_assign}
+        if bashparser_model.is_call_expr(cmd) and "Args" in cmd and len(cmd["Args"]) > 0:
+            # Statement executing a command, generate node with command name expression and
+            # expressions for each argument value.
+            # In the case where a word may tokenize as multiple words depending on the value,
+            # attempt to resolve them and where they do resolve to something that tokenizes as
+            # multiple args, generate alternative interpretations with those expanded number of
+            # args, alongside interpretations where those words are a dynamic expression that is
+            # constrained to be a single word.
+            arg_vals = [convert_shell_word_to_value(arg, self.context.ref) for arg in cmd["Args"]]
+            multitoken_resolved_arg_vals: dict[
+                int, list[tuple[list[bashparser_model.Word], evaluation.ReadBindings]]
+            ] = defaultdict(list)
+
+            for index, arg_val_elem in enumerate(arg_vals):
+                if arg_val_elem is None:
+                    continue
+                arg_val_elem_val, arg_quoted = arg_val_elem
+                if not arg_quoted:
+                    resolved_arg_vals = evaluation.evaluate(self, arg_val_elem_val)
+                    for resolved_arg_val, resolved_arg_val_bindings in resolved_arg_vals:
+                        match resolved_arg_val:
+                            case facts.StringLiteral(literal):
+                                parsed_bash_expr = parse_bash_expr(literal)
+                                if parsed_bash_expr is not None and len(parsed_bash_expr) > 1:
+                                    multitoken_resolved_arg_vals[index].append(
+                                        (parsed_bash_expr, resolved_arg_val_bindings)
+                                    )
+            arg_indices_in_order: list[int] = []
+            values_indices_in_order: list[list[int]] = []
+            for index, vals in multitoken_resolved_arg_vals.items():
+                arg_indices_in_order.append(index)
+                values_indices_in_order.append([index for index, _ in enumerate(vals)] + [-1])
+
+            # Cross product could become very expensive
+            values_product = list(product(*values_indices_in_order))
+
+            if len(values_product) == 0:
+                values_product = [()]
+
+            result: dict[core.InterpretationKey, Callable[[], core.Node]] = {}
+
+            for values_product_elem in values_product:
+                new_arg_vals: dict[int, list[facts.Value | None]] = {}
+                read_bindings_list: list[evaluation.ReadBindings] = []
+                for arg_index, value_index in zip(arg_indices_in_order, values_product_elem):
+                    if value_index != -1:
+                        expanded_vals, bindings = multitoken_resolved_arg_vals[arg_index][value_index]
+                        read_bindings_list.append(bindings)
+                        converted = [
+                            convert_shell_word_to_value(expanded_val, self.context.ref)
+                            for expanded_val in expanded_vals
+                        ]
+                        new_arg_vals[arg_index] = [x[0] if x is not None else None for x in converted]
+                    else:
+                        old_arg_val = arg_vals[arg_index]
+                        new_arg_vals[arg_index] = [
+                            facts.SingleBashTokenConstraint(old_arg_val[0]) if old_arg_val is not None else None
+                        ]
+
+                combined_bindings = evaluation.ReadBindings.combine_bindings(read_bindings_list)
+                if combined_bindings is None:
+                    continue
+                full_arg_list: list[facts.Value | None] = []
+
+                for index, arg_val in enumerate(arg_vals):
+                    if index in new_arg_vals:
+                        full_arg_list.extend(new_arg_vals[index])
+                    else:
+                        full_arg_list.append(arg_val[0] if arg_val is not None else None)
+
+                cmd_arg = full_arg_list[0]
+                # TODO subshells
+                if cmd_arg is not None:
+                    cmd_arg_val = cmd_arg
+
+                    def build_single_cmd(  # pylint: disable=dangerous-default-value
+                        cmd_arg: facts.Value = cmd_arg_val, cmd_arg_list: list[facts.Value | None] = full_arg_list[1:]
+                    ) -> core.Node:
+                        stdout_redirs = get_stdout_redirects(self.definition, self.context.ref)
+                        return BashSingleCommandNode(
+                            self.definition, self.context.get_non_owned(), cmd_arg, cmd_arg_list, stdout_redirs
+                        )
+
+                    result[("cmd", values_product_elem, combined_bindings)] = build_single_cmd
+            return result
+        if bashparser_model.is_if_clause(cmd):
+            # If statement.
+
+            def build_if() -> core.Node:
+                return BashIfClauseNode.create(cmd, self.context.get_non_owned())
+
+            return {"default": build_if}
+
+        if bashparser_model.is_for_clause(cmd):
+            # For statement.
+
+            def build_for() -> core.Node:
+                return BashForClauseNode.create(cmd, self.context.get_non_owned())
+
+            return {"default": build_for}
+        if bashparser_model.is_binary_cmd(cmd):
+            match cmd["Op"]:
+                case bashparser_model.BinCmdOperators.Pipe.value:
+
+                    def build_pipe() -> core.Node:
+                        return BashPipeNode.create(cmd, self.context.get_non_owned())
+
+                    return {"default": build_pipe}
+                case bashparser_model.BinCmdOperators.PipeAll.value:
+                    pass
+                case bashparser_model.BinCmdOperators.AndStmt.value:
+
+                    def build_and() -> core.Node:
+                        return BashAndNode.create(cmd, self.context.get_non_owned())
+
+                    return {"default": build_and}
+                case bashparser_model.BinCmdOperators.OrStmt.value:
+
+                    def build_or() -> core.Node:
+                        return BashOrNode.create(cmd, self.context.get_non_owned())
+
+                    return {"default": build_or}
+            raise CallGraphError("unknown binary operator: " + str(cmd["Op"]))
+        if bashparser_model.is_func_decl(cmd):
+            # Represent Bash function decl as a store of the serialized function defintion,
+            # into a variable in the function decl scope.
+            func_decl_str = json.dumps(cmd)
+
+            def build_func_decl() -> core.Node:
+                return models.VarAssignNode(
+                    kind=models.VarAssignKind.BASH_FUNC_DECL,
+                    var_scope=self.context.ref.func_decls.ref,
+                    var_name=facts.StringLiteral(cmd["Name"]["Value"]),
+                    value=facts.StringLiteral(func_decl_str),
+                )
+
+            return {"default": build_func_decl}
+        if bashparser_model.is_block(cmd):
+
+            def build_block() -> core.Node:
+                return BashBlockNode.create(cmd, self.context.get_non_owned())
+
+            return {"default": build_block}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+
+class BashIfClauseNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a Bash if statement.
+
+    Control flow structure consists of executing the statements of the condition,
+    followed by a branch to execute either the then node or the else node (or if
+    there is no else node, exit immediately). The analysis is not path sensitive,
+    so both branches are always considered possible regardless of the condition.
+    """
+
+    #: Parsed if statement AST.
+    definition: bashparser_model.IfClause
+    #: Block node to execute the condition.
+    cond_stmts: BashBlockNode
+    #: Block node for the case where the condition is true.
+    then_stmts: BashBlockNode
+    #: Node for the case where the condition is false, if any
+    #: (will be another if node in the case of an elif).
+    else_stmts: BashBlockNode | BashIfClauseNode | None
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.IfClause,
+        cond_stmts: BashBlockNode,
+        then_stmts: BashBlockNode,
+        else_stmts: BashBlockNode | BashIfClauseNode | None,
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash if statement node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.IfClause
+            Parsed if statement AST.
+        cond_stmts: BashBlockNode
+            Block node to execute the condition.
+        then_stmts: BashBlockNode
+            Block node for the case where the condition is true.
+        else_stmts: BashBlockNode | BashIfClauseNode | None
+            Node for the case where the condition is false, if any
+            (will be another if node in the case of an elif).
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.cond_stmts = cond_stmts
+        self.then_stmts = then_stmts
+        self.else_stmts = else_stmts
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph(self.cond_stmts)
+        self._cfg.add_successor(self.cond_stmts, core.DEFAULT_EXIT, self.then_stmts)
+        self._cfg.add_successor(self.then_stmts, core.DEFAULT_EXIT, core.DEFAULT_EXIT)
+        if else_stmts is not None:
+            self._cfg.add_successor(self.cond_stmts, core.DEFAULT_EXIT, else_stmts)
+            self._cfg.add_successor(else_stmts, core.DEFAULT_EXIT, core.DEFAULT_EXIT)
+        else:
+            self._cfg.add_successor(self.cond_stmts, core.DEFAULT_EXIT, core.DEFAULT_EXIT)
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the condition node, then node and (if present) else node."""
+        yield self.cond_stmts
+        yield self.then_stmts
+        if self.else_stmts is not None:
+            yield self.else_stmts
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node (the condition node)."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns a propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        if_stmt: bashparser_model.IfClause, context: core.NonOwningContextRef[BashScriptContext]
+    ) -> BashIfClauseNode:
+        """Create a Bash if statement node from if statement AST.
+
+        Parameters
+        ----------
+        if_stmt: bashparser_model.IfClause
+            Parsed if statement AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        cond_stmts = BashBlockNode.create(if_stmt["Cond"], context)
+        then_stmts = BashBlockNode.create(if_stmt["Then"], context)
+        else_clause = if_stmt.get("Else")
+        else_part: BashBlockNode | BashIfClauseNode | None = None
+        if else_clause is None:
+            else_part = None
+        elif bashparser_model.is_else_clause(else_clause):
+            else_part = BashBlockNode.create(else_clause["Then"], context)
+        else:
+            else_part = BashIfClauseNode.create(cast(bashparser_model.IfClause, else_clause), context)
+        return BashIfClauseNode(
+            definition=if_stmt, cond_stmts=cond_stmts, then_stmts=then_stmts, else_stmts=else_part, context=context
+        )
+
+
+class BashForClauseNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a Bash for statement.
+
+    Control flow structure consists of executing the statements of the condition,
+    followed by a branch to execute or skip the loop body node . The analysis is
+    not path sensitive, so both branches are always considered possible regardless
+    of the condition.
+
+    TODO: Currently doesn't actually model the loop back edge (need more testing to
+    be confident of analysis termination in the presence of loops).
+    """
+
+    #: Parsed for statement AST.
+    definition: bashparser_model.ForClause
+    #: Block node to execute the initializer.
+    init_stmts: BashBlockNode | None
+    #: Block node to execute the condition.
+    cond_stmts: BashBlockNode | None
+    #: Block node for the loop body.
+    body_stmts: BashBlockNode
+    #: Block node to execute the post.
+    post_stmts: BashBlockNode | None
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.ForClause,
+        init_stmts: BashBlockNode | None,
+        cond_stmts: BashBlockNode | None,
+        body_stmts: BashBlockNode,
+        post_stmts: BashBlockNode | None,
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash for statement node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.ForClause
+            Parsed if statement AST.
+        init_stmts: BashBlockNode | None
+            Block node to execute the initializer.
+        cond_stmts: BashBlockNode | None
+            Block node to execute the condition.
+        body_stmts: BashBlockNode
+            Block node for the body.
+        post_stmts: BashBlockNode | None
+            Block node to execute the post.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.init_stmts = init_stmts
+        self.cond_stmts = cond_stmts
+        self.body_stmts = body_stmts
+        self.post_stmts = post_stmts
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(
+            list(filter(core.node_is_not_none, [self.init_stmts, self.cond_stmts, self.body_stmts, self.post_stmts]))
+        )
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the initializer, condition, body and post nodes."""
+        if self.init_stmts is not None:
+            yield self.init_stmts
+        if self.cond_stmts is not None:
+            yield self.cond_stmts
+        yield self.body_stmts
+        if self.post_stmts is not None:
+            yield self.post_stmts
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns a propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        for_stmt: bashparser_model.ForClause, context: core.NonOwningContextRef[BashScriptContext]
+    ) -> BashForClauseNode:
+        """Create a Bash for statement node from for statement AST.
+
+        Parameters
+        ----------
+        for_stmt: bashparser_model.ForClause
+            Parsed for statement AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        body_stmts = BashBlockNode.create(for_stmt["Do"], context)
+
+        loop = for_stmt["Loop"]
+        if not bashparser_model.is_cstyle_loop(loop):
+            return BashForClauseNode(
+                definition=for_stmt,
+                init_stmts=None,
+                cond_stmts=None,
+                body_stmts=body_stmts,
+                post_stmts=None,
+                context=context,
+            )
+
+        init_stmts: BashBlockNode | None = None
+        if "Init" in loop:
+            init_arithm_cmd = bashparser_model.ArithmCmd(
+                Type="ArithmCmd",
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Left=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Right=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                X=loop["Init"],
+            )
+            init_stmt = bashparser_model.Stmt(
+                Cmd=init_arithm_cmd,
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Position=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+            )
+            init_stmts = BashBlockNode.create([init_stmt], context)
+
+        cond_stmts: BashBlockNode | None = None
+        if "Cond" in loop:
+            cond_arithm_cmd = bashparser_model.ArithmCmd(
+                Type="ArithmCmd",
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Left=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Right=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                X=loop["Cond"],
+            )
+            cond_stmt = bashparser_model.Stmt(
+                Cmd=cond_arithm_cmd,
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Position=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+            )
+            cond_stmts = BashBlockNode.create([cond_stmt], context)
+
+        post_stmts: BashBlockNode | None = None
+        if "Post" in loop:
+            post_arithm_cmd = bashparser_model.ArithmCmd(
+                Type="ArithmCmd",
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Left=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Right=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                X=loop["Post"],
+            )
+            post_stmt = bashparser_model.Stmt(
+                Cmd=post_arithm_cmd,
+                Pos=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                End=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+                Position=bashparser_model.Pos(Offset=0, Line=0, Col=0),
+            )
+            post_stmts = BashBlockNode.create([post_stmt], context)
+
+        return BashForClauseNode(
+            definition=for_stmt,
+            init_stmts=init_stmts,
+            cond_stmts=cond_stmts,
+            body_stmts=body_stmts,
+            post_stmts=post_stmts,
+            context=context,
+        )
+
+
+@dataclass(frozen=True)
+class BashPipeContext(core.Context):
+    """Context for a Bash pipe operation.
+
+    Introduces a scope and location to represent the pipe itself connecting the piped commands,
+    where output from the piped-from command is written prior to being read as input by the piped-to
+    command.
+    """
+
+    #: Outer Bash script context
+    bash_script_context: core.ContextRef[BashScriptContext]
+    #: Scope for pipe.
+    pipe_scope: core.ContextRef[facts.Scope]
+    #: Location for pipe.
+    pipe_loc: facts.LocationSpecifier
+
+    @staticmethod
+    def create(context: core.ContextRef[BashScriptContext]) -> BashPipeContext:
+        """Create a new pipe context and its associated scope."""
+        return BashPipeContext(context.get_non_owned(), core.OwningContextRef(facts.Scope("pipe")), facts.Console())
+
+    def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+        yield self.bash_script_context
+        yield self.pipe_scope
+
+
+class BashPipeNode(core.ControlFlowGraphNode):
+    """Control flow node representing a Bash pipe ("|") binary command.
+
+    Control flow structure consists of executing the left-hand side,
+    followed by the right-hand side.
+    A pipe scope and location is introduced to model the piping of the
+    output from the first command to the input of the second command.
+    """
+
+    #: Parsed pipe binary command AST.
+    definition: bashparser_model.BinaryCmd
+    #: Left-hand side (first) command.
+    lhs: BashStatementNode
+    #: Right-hand side (second) command.
+    rhs: BashStatementNode
+    #: Pipe context.
+    context: core.ContextRef[BashPipeContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.BinaryCmd,
+        lhs: BashStatementNode,
+        rhs: BashStatementNode,
+        context: core.ContextRef[BashPipeContext],
+    ) -> None:
+        """Initialize Bash pipe node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.BinaryCmd
+            Parsed pipe binary command AST.
+        lhs: BashStatementNode
+            Left-hand side (first) command.
+        rhs: BashStatementNode
+            Right-hand side (second) command.
+        context: core.ContextRef[BashPipeContext]
+            Pipe context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.lhs = lhs
+        self.rhs = rhs
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph(self.lhs)
+        self._cfg.add_successor(self.lhs, core.DEFAULT_EXIT, self.rhs)
+        self._cfg.add_successor(self.rhs, core.DEFAULT_EXIT, core.DEFAULT_EXIT)
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the subcommands."""
+        yield self.lhs
+        yield self.rhs
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node (the lhs node)."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns a propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        pipe_cmd: bashparser_model.BinaryCmd, context: core.NonOwningContextRef[BashScriptContext]
+    ) -> BashPipeNode:
+        """Create Bash pipe node from pipe binary command AST.
+
+        Parameters
+        ----------
+        pipe_cmd: bashparser_model.BinaryCmd
+            Parsed pipe binary command AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        pipe_context = core.OwningContextRef(BashPipeContext.create(context))
+        piped_from_context = core.NonOwningContextRef(
+            context.ref.with_stdout(pipe_context.ref.pipe_scope.get_non_owned(), pipe_context.ref.pipe_loc)
+        )
+        piped_to_context = core.NonOwningContextRef(
+            context.ref.with_stdin(pipe_context.ref.pipe_scope.get_non_owned(), pipe_context.ref.pipe_loc)
+        )
+        lhs = BashStatementNode(pipe_cmd["X"], piped_from_context)
+        rhs = BashStatementNode(pipe_cmd["Y"], piped_to_context)
+        return BashPipeNode(definition=pipe_cmd, lhs=lhs, rhs=rhs, context=pipe_context)
+
+
+class BashAndNode(core.ControlFlowGraphNode):
+    """Control flow node representing a Bash AND ("&&") binary command.
+
+    Control flow structure consists of executing the left-hand side,
+    followed by the right-hand side.
+
+    (TODO model short circuit?)
+    """
+
+    #: Parsed AND binary command AST.
+    definition: bashparser_model.BinaryCmd
+    #: Left-hand side (first) command.
+    lhs: BashStatementNode
+    #: Right-hand side (second) command.
+    rhs: BashStatementNode
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.BinaryCmd,
+        lhs: BashStatementNode,
+        rhs: BashStatementNode,
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash and node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.BinaryCmd
+            Parsed AND binary command AST.
+        lhs: BashStatementNode
+            Left-hand side (first) command.
+        rhs: BashStatementNode
+            Right-hand side (second) command.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.lhs = lhs
+        self.rhs = rhs
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence([lhs, rhs])
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the subcommands."""
+        yield self.lhs
+        yield self.rhs
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node (the lhs node)."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns a propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        and_cmd: bashparser_model.BinaryCmd, context: core.NonOwningContextRef[BashScriptContext]
+    ) -> BashAndNode:
+        """Create Bash and node from AND binary command AST.
+
+        Parameters
+        ----------
+        and_cmd: bashparser_model.BinaryCmd
+            Parsed AND binary command AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        lhs = BashStatementNode(and_cmd["X"], context)
+        rhs = BashStatementNode(and_cmd["Y"], context)
+        return BashAndNode(definition=and_cmd, lhs=lhs, rhs=rhs, context=context)
+
+
+class BashOrNode(core.ControlFlowGraphNode):
+    """Control flow node representing a Bash OR ("||") binary command.
+
+    Control flow structure consists of executing the left-hand side,
+    followed by the right-hand side.
+
+    (TODO model short circuit?)
+    """
+
+    #: Parsed OR binary command AST.
+    definition: bashparser_model.BinaryCmd
+    #: Left-hand side (first) command.
+    lhs: BashStatementNode
+    #: Right-hand side (second) command.
+    rhs: BashStatementNode
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: bashparser_model.BinaryCmd,
+        lhs: BashStatementNode,
+        rhs: BashStatementNode,
+        context: core.ContextRef[BashScriptContext],
+    ) -> None:
+        """Initialize Bash OR node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: bashparser_model.BinaryCmd
+            Parsed OR binary command AST.
+        lhs: BashStatementNode
+            Left-hand side (first) command.
+        rhs: BashStatementNode
+            Right-hand side (second) command.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.lhs = lhs
+        self.rhs = rhs
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence([lhs, rhs])
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the subcommands."""
+        yield self.lhs
+        yield self.rhs
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node (the lhs node)."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successor for a given node.
+
+        Returns a propagated early exit of the same type in the case of a BashExit or BashReturn exit type.
+        """
+        if isinstance(exit_type, (BashExit, BashReturn)):
+            return {exit_type}
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the line number and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(or_cmd: bashparser_model.BinaryCmd, context: core.NonOwningContextRef[BashScriptContext]) -> BashOrNode:
+        """Create Bash OR node from OR binary command AST.
+
+        Parameters
+        ----------
+        and_cmd: bashparser_model.BinaryCmd
+            Parsed AND binary command AST.
+        context: core.NonOwningContextRef[BashScriptContext]
+            Bash script context.
+        """
+        lhs = BashStatementNode(or_cmd["X"], context)
+        rhs = BashStatementNode(or_cmd["Y"], context)
+        return BashOrNode(definition=or_cmd, lhs=lhs, rhs=rhs, context=context)
+
+
+class BashSingleCommandNode(core.InterpretationNode):
+    """Interpretation node representing a single Bash command.
+
+    Defines how to interpret the semantics of the different supported commands that
+    may be invoked.
+    """
+
+    #: Parsed statement AST.
+    definition: bashparser_model.Stmt
+    #: Bash script context.
+    context: core.ContextRef[BashScriptContext]
+    #: Expression for command name.
+    cmd: facts.Value
+    #: Expressions for argument values (None if unrepresentable).
+    args: list[facts.Value | None]
+    #: Location expressions for where stdout is redirected to.
+    stdout_redirects: set[facts.Location]
+
+    def __init__(
+        self,
+        definition: bashparser_model.Stmt,
+        context: core.ContextRef[BashScriptContext],
+        cmd: facts.Value,
+        args: list[facts.Value | None],
+        stdout_redirects: set[facts.Location],
+    ) -> None:
+        """Initialize Bash single command node.
+
+        Parameters
+        ----------
+        definition: bashparser_model.Stmt
+            Parsed statement AST.
+        context: core.ContextRef[BashScriptContext]
+            Bash script context.
+        cmd: facts.Value
+            Expression for command name.
+        args: list[facts.Value | None]
+            Expressions for argument values (None if unrepresentable).
+        stdout_redirects: set[facts.Location]
+            Location expressions for where stdout is redirected to.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+        self.cmd = cmd
+        self.args = args
+        self.stdout_redirects = stdout_redirects
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret the semantics of the different supported commands that may be invoked."""
+        eval_transformer = evaluation.EvaluationTransformer(state)
+        evaluated_writes = eval_transformer.transform_value(self.cmd)
+        result: dict[core.InterpretationKey, Callable[[], core.Node]] = {}
+
+        for resolved_cmd, bindings in evaluated_writes:
+            match resolved_cmd:
+                case facts.StringLiteral("echo"):
+                    # Echo command, may have two different interpretations:
+                    # - The concrete semantics of writing to the location its stdout is directed to
+                    # - If writing to the special GitHub output var file, the higher-level semantics
+                    #   of writing to the variable as specified in the echoed value.
+                    if len(self.stdout_redirects) in {0, 1} and len(self.args) == 1:
+                        first_arg = self.args[0]
+                        stdout_redir = (
+                            next(iter(self.stdout_redirects))
+                            if len(self.stdout_redirects) == 1
+                            else facts.Location(self.context.ref.stdout_scope.ref, self.context.ref.stdout_loc)
+                        )
+                        if first_arg is not None:
+                            first_arg_val = first_arg
+
+                            def build_echo(
+                                stdout_redir: facts.Location = stdout_redir, first_arg_val: facts.Value = first_arg_val
+                            ) -> core.Node:
+                                return models.BashEchoNode(stdout_redir, first_arg_val)
+
+                            github_context = self.context.ref.get_containing_github_context()
+
+                            if (
+                                self._is_github_output_loc(stdout_redir)
+                                and github_context is not None
+                                and github_context.output_var_prefix is not None
+                            ):
+                                output_var_prefix = github_context.output_var_prefix
+                                job_variables_scope = github_context.job_context.ref.job_variables.ref
+                                split = evaluation.parse_str_expr_split(first_arg, "=", maxsplit=1)
+                                if len(split) == 2:
+
+                                    def build_github_var_write(
+                                        job_variables_scope: facts.Scope = job_variables_scope,
+                                        output_var_prefix: str = output_var_prefix,
+                                        split: list[facts.Value] = split,
+                                    ) -> core.Node:
+                                        return models.VarAssignNode(
+                                            kind=models.VarAssignKind.GITHUB_JOB_VAR,
+                                            var_scope=job_variables_scope,
+                                            var_name=facts.BinaryStringOp.get_string_concat(
+                                                facts.StringLiteral(output_var_prefix), split[0]
+                                            ),
+                                            value=split[1],
+                                        )
+
+                                    result[("echo_github_var", bindings)] = build_github_var_write
+
+                            result[("echo", bindings)] = build_echo
+                case facts.StringLiteral("mvn"):
+                    # Maven build command.
+                    for arg in self.args:
+                        match arg:
+                            case facts.StringLiteral(arg_lit):
+                                if arg_lit in {"package", "install", "deploy", "verify"}:
+
+                                    def build_mvn_build() -> core.Node:
+                                        return models.MavenBuildModelNode(
+                                            filesystem_scope=self.context.ref.filesystem.ref
+                                        )
+
+                                    result[("mvn", bindings)] = build_mvn_build
+                case facts.StringLiteral("exit"):
+                    # Exit command exits the script.
+                    def build_exit_stmt() -> core.Node:
+                        return BashExitNode()
+
+                    result[("exit", bindings)] = build_exit_stmt
+                case facts.StringLiteral("base64"):
+                    # base64 command may encode or decode Base64 strings.
+
+                    # TODO model other possibilities
+                    if len(self.stdout_redirects) in {0, 1}:
+                        stdout_redir = (
+                            next(iter(self.stdout_redirects))
+                            if len(self.stdout_redirects) == 1
+                            else facts.Location(self.context.ref.stdout_scope.ref, self.context.ref.stdout_loc)
+                        )
+                        if len(self.args) == 0:
+
+                            def build_base64_encode(stdout_redir: facts.Location = stdout_redir) -> core.Node:
+                                return models.Base64EncodeNode(
+                                    facts.Location(self.context.ref.stdin_scope.ref, self.context.ref.stdin_loc),
+                                    stdout_redir,
+                                )
+
+                            result[("base64_encode", bindings)] = build_base64_encode
+                        elif len(self.args) == 1 and (
+                            self.args[0] == facts.StringLiteral("-d") or self.args[0] == facts.StringLiteral("--decode")
+                        ):
+
+                            def build_base64_decode(stdout_redir: facts.Location = stdout_redir) -> core.Node:
+                                return models.Base64DecodeNode(
+                                    facts.Location(self.context.ref.stdin_scope.ref, self.context.ref.stdin_loc),
+                                    stdout_redir,
+                                )
+
+                            result[("base64_decode", bindings)] = build_base64_decode
+                case facts.StringLiteral(cmd_name) if cmd_name.endswith(".sh"):
+                    # Invoking another shell script.
+
+                    # TODO pass arguments
+
+                    repo_path = self.context.ref.get_containing_analysis_context().repo_path
+                    if repo_path is not None:
+                        # Check for path traversal patterns before analyzing a bash file.
+                        # TODO working dir
+                        bash_file_path = os.path.realpath(os.path.join(repo_path, "", cmd_name))
+                        if os.path.exists(bash_file_path) and bash_file_path.startswith(repo_path):
+
+                            def build_run_bash_script_file(bash_file_path: str = bash_file_path) -> core.Node:
+                                bash_text = ""
+                                with open(bash_file_path, encoding="utf-8") as bash_file:
+                                    bash_text = bash_file.read()
+                                return RawBashScriptNode(
+                                    facts.StringLiteral(bash_text),
+                                    core.OwningContextRef(
+                                        BashScriptContext.create_from_bash_script(self.context, bash_file_path)
+                                    ),
+                                )
+
+                            result[("run_file_bash_script", bindings)] = build_run_bash_script_file
+                case facts.StringLiteral(cmd_name):
+                    # If the command name is a defined shell function (as resolved from a read of the variable of that
+                    # name in the function decl scope), then create a function call to the function definition stored
+                    # in that variable.
+
+                    evaluated_func_decls = evaluation.evaluate(
+                        self,
+                        facts.Read(
+                            facts.Location(
+                                scope=self.context.ref.func_decls.ref, loc=facts.Variable(facts.StringLiteral(cmd_name))
+                            )
+                        ),
+                    )
+                    for resolved_func, resolved_func_bindings in evaluated_func_decls:
+                        if isinstance(resolved_func, facts.StringLiteral):
+                            combined_func_bindings = evaluation.ReadBindings.combine_bindings(
+                                [bindings, resolved_func_bindings]
+                            )
+                            if combined_func_bindings is not None:
+                                resolved_func_json = resolved_func.literal
+
+                                def build_func_call(func_json: str = resolved_func_json) -> core.Node:
+                                    func_decl = cast(bashparser_model.FuncDecl, json.loads(func_json))
+                                    return BashFuncCallNode(
+                                        self.definition,
+                                        func_decl,
+                                        BashBlockNode.create([func_decl["Body"]], self.context.get_non_owned()),
+                                        self.context,
+                                    )
+
+                                result[("function_call", combined_func_bindings)] = build_func_call
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        if not isinstance(self.cmd, facts.StringLiteral) or len(result) == 0:
+            result["default"] = build_noop
+
+        return result
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table.
+
+        Contains the line number, command expression, argument expressions, stdout redirect location expressions, and scopes.
+        """
+        properties: dict[str, set[tuple[str | None, str]]] = {}
+        properties["line num (in script)"] = {(None, str(self.definition["Pos"]["Line"]))}
+        properties["cmd"] = {(None, self.cmd.to_datalog_fact_string())}
+        for index, arg in enumerate(self.args):
+            properties["arg" + str(index)] = {
+                (None, arg.to_datalog_fact_string()) if arg is not None else (None, "UNKNOWN")
+            }
+        properties["stdout_redirects"] = {(None, x.to_datalog_fact_string()) for x in self.stdout_redirects}
+        printing.add_context_owned_scopes_to_properties_table(properties, self.context)
+        return properties
+
+    @staticmethod
+    def _is_github_output_loc(loc: facts.Location) -> bool:
+        """Return whether the location is the special GitHub output variable file."""
+        match loc:
+            case facts.Location(
+                _, facts.Filesystem(facts.Read(facts.Location(_, facts.Variable(facts.StringLiteral("GITHUB_OUTPUT")))))
+            ):
+                return True
+        return False
+
+
+class BashExitNode(core.StatementNode):
+    """Statement node representing a Bash exit command.
+
+    Always exits with the BashExit exit type (which causes the whole script to exit).
+    """
+
+    def apply_effects(self, before_state: core.State) -> dict[core.ExitType, core.State]:
+        """Apply the effects of the Bash exit.
+
+        Returns a BashExit exit state that is otherwise the same as the before state.
+        """
+        state = core.State()
+        core.transfer_state(before_state, state)
+        return {BASH_EXIT: state}
+
+
+@dataclass(frozen=True)
+class LiteralOrEnvVar:
+    """Represents either a literal or a read of an environment variable."""
+
+    #: Whether this represents an environment variable (or else a string literal).
+    is_env_var: bool
+    #: The environment variable name or string literal value.
+    literal: str
+
+
+def is_simple_var_read(param_exp: bashparser_model.ParamExp) -> bool:
+    """Return whether expression is a simple env var read e.g. $ENV_VAR."""
+    if param_exp.get("Excl", False) or param_exp.get("Length", False) or param_exp.get("Width", False):
+        return False
+    if (
+        "Index" in param_exp
+        or "Slice" in param_exp
+        or "Repl" in param_exp
+        or "Names" in param_exp
+        or "Exp" in param_exp
+    ):
+        return False
+    return True
+
+
+def parse_env_var_read_word_part(part: bashparser_model.WordPart, allow_dbl_quoted: bool) -> str | None:
+    """Parse word part as a read of an environment variable.
+
+    If the given word part is a read of an env var (possibly enclosed in double quotes, if allowed),
+    return the name of the variable, otherwise None.
+    """
+    if bashparser_model.is_dbl_quoted(part):
+        if not allow_dbl_quoted:
+            return None
+        if "Parts" not in part or len(part["Parts"]) == 0:
+            return ""
+        if len(part["Parts"]) == 1:
+            part = part["Parts"][0]
+        else:
+            return None
+
+    if bashparser_model.is_param_exp(part):
+        if not is_simple_var_read(part):
+            return None
+        return part["Param"]["Value"]
+
+    return None
+
+
+def parse_env_var_read_word(word: bashparser_model.Word, allow_dbl_quoted: bool) -> str | None:
+    """Parse word as a read of an environment variable.
+
+    If the given word is a read of an env var (possibly enclosed in double quotes, if allowed),
+    return the name of the variable, otherwise None.
+    """
+    if len(word["Parts"]) == 1:
+        part = word["Parts"][0]
+        return parse_env_var_read_word_part(part, allow_dbl_quoted)
+    return None
+
+
+def parse_content(parts: list[bashparser_model.WordPart], allow_dbl_quoted: bool) -> list[LiteralOrEnvVar] | None:
+    """Parse the given sequence of word parts.
+
+    Return a representation as a sequence of string literal and env var reads, or else return None if not representable in this way.
+
+    If allow_dbl_quoted is True, permit word parts to be double quoted expressions, the content of which will
+    be included in the sequence (if False, return None if the sequence contains double quoted expressions).
+    """
+    content: list[LiteralOrEnvVar] = []
+    for part in parts:
+        env_var = parse_env_var_read_word_part(part, allow_dbl_quoted)
+        if env_var is not None:
+            content.append(LiteralOrEnvVar(is_env_var=True, literal=env_var))
+        elif bashparser_model.is_lit(part):
+            content.append(LiteralOrEnvVar(is_env_var=False, literal=part["Value"]))
+        elif bashparser_model.is_dbl_quoted(part) and "Parts" in part:
+            subcontent = parse_content(part["Parts"], False)
+            if subcontent is None:
+                return None
+            content.extend(subcontent)
+        else:
+            return None
+    return content
+
+
+def convert_shell_value_sequence_to_fact_value(
+    content: list[LiteralOrEnvVar], context: BashScriptContext
+) -> facts.Value:
+    """Convert sequence of Bash values into a single concatenated expression."""
+    if len(content) == 0:
+        raise CallGraphError("sequence cannot be empty")
+
+    first_val = convert_shell_value_to_fact_value(content[0], context)
+    if len(content) == 1:
+        return first_val
+
+    rest_val = convert_shell_value_sequence_to_fact_value(content[1:], context)
+
+    return facts.BinaryStringOp(op=facts.BinaryStringOperator.STRING_CONCAT, operand1=first_val, operand2=rest_val)
+
+
+def convert_shell_value_to_fact_value(val: LiteralOrEnvVar, context: BashScriptContext) -> facts.Value:
+    """Convert a Bash literal or env var read into a value expression."""
+    if val.is_env_var:
+        return facts.Read(
+            loc=facts.Location(scope=context.env.ref, loc=facts.Variable(name=facts.StringLiteral(literal=val.literal)))
+        )
+    return facts.StringLiteral(literal=val.literal)
+
+
+def convert_shell_word_to_value(
+    word: bashparser_model.Word, context: BashScriptContext
+) -> tuple[facts.Value, bool] | None:
+    """Convert a Bash word into a value expression.
+
+    Return value expression alongside a bool indicating whether the value is
+    "quoted" (or else may require further expansion post-resolution if "unquoted").
+    """
+    dbl_quoted_parts = parse_dbl_quoted_string(word)
+    if dbl_quoted_parts is not None:
+        return convert_shell_value_sequence_to_fact_value(dbl_quoted_parts, context), True
+
+    sgl_quoted_str = parse_sql_quoted_string(word)
+    if sgl_quoted_str is not None:
+        return facts.StringLiteral(sgl_quoted_str), True
+
+    singular_literal = parse_singular_literal(word)
+    if singular_literal is not None:
+        return facts.StringLiteral(literal=singular_literal), True
+
+    single_var = parse_env_var_read_word(word, False)
+    if single_var is not None:
+        return convert_shell_value_to_fact_value(LiteralOrEnvVar(True, single_var), context), False
+
+    return None
+
+
+def parse_dbl_quoted_string(word: bashparser_model.Word) -> list[LiteralOrEnvVar] | None:
+    """Parse double quoted string.
+
+    If the given word is a double quoted expression, return
+    a representation as a sequence of string literal and env var reads, or
+    else return None if it is not a double quoted expression or if it is
+    not representable in this way.
+    """
+    if len(word["Parts"]) == 1:
+        part = word["Parts"][0]
+        if bashparser_model.is_dbl_quoted(part) and "Parts" in part:
+            return parse_content(part["Parts"], False)
+
+    return None
+
+
+def parse_sql_quoted_string(word: bashparser_model.Word) -> str | None:
+    """Parse single quoted string.
+
+    If the given word is a single quoted string, return the string
+    literal content, otherwise return None.
+    """
+    if len(word["Parts"]) == 1:
+        part = word["Parts"][0]
+        if bashparser_model.is_sgl_quoted(part):
+            return part["Value"]
+
+    return None
+
+
+def parse_singular_literal(word: bashparser_model.Word) -> str | None:
+    """Parse singular literal word.
+
+    If the given word is a single literal, return the string
+    literal content, otherwise return None.
+    """
+    if len(word["Parts"]) == 1:
+        part = word["Parts"][0]
+        if bashparser_model.is_lit(part):
+            return part["Value"]
+
+    return None
+
+
+# Cache for Bash expression parsing.
+# note: not thread safe
+_bashparser_cache: dict[str, list[bashparser_model.Word] | None] = {}
+
+
+def parse_bash_expr(expr: str) -> list[bashparser_model.Word] | None:
+    """Parse bash expression.
+
+    Results are cached to avoid unnessary invocations of the Bash parser
+    (since it requires spawning a separate process).
+    """
+    if expr in _bashparser_cache:
+        return _bashparser_cache[expr]
+    try:
+        parse_result = bashparser.parse_expr(expr, MACARON_PATH)
+        _bashparser_cache[expr] = parse_result
+        return parse_result
+    except ParseError:
+        return None
diff --git a/src/macaron/code_analyzer/dataflow_analysis/cmd_parser.py b/src/macaron/code_analyzer/dataflow_analysis/cmd_parser.py
new file mode 100644
index 000000000..f6a074a90
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/cmd_parser.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains parsers for command line interfaces for commands relevant to analysis."""
+
+from __future__ import annotations
+
+import argparse
+
+
+def parse_python_command_line(args: list[str]) -> argparse.Namespace:
+    """Parse python command line.
+
+    Parameters
+    ----------
+    args: list[str]
+        Argument list to python command
+
+    Returns
+    -------
+    argparse.Namespace
+        Parsed python command args
+    """
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("-B", action="store_true")
+    parser.add_argument("-b", action="count")
+    parser.add_argument("--check-hash-based-pycs")
+    parser.add_argument("-d", action="store_true")
+    parser.add_argument("-E", action="store_true")
+    parser.add_argument("-h", action="store_true")
+    parser.add_argument("-?", action="store_true", dest="h")
+    parser.add_argument("--help", action="store_true", dest="h")
+    parser.add_argument("--help-env", action="store_true")
+    parser.add_argument("--help-xoptions", action="store_true")
+    parser.add_argument("--help-all", action="store_true")
+    parser.add_argument("-i", action="store_true")
+    parser.add_argument("-I", action="store_true")
+    parser.add_argument("-o", action="count")
+    parser.add_argument("-P", action="store_true")
+    parser.add_argument("-q", action="store_true")
+    parser.add_argument("-s", action="store_true")
+    parser.add_argument("-S", action="store_true")
+    parser.add_argument("-u", action="store_true")
+    parser.add_argument("-v", action="count")
+    parser.add_argument("-V", action="count")
+    parser.add_argument("--version", action="count", dest="V")
+    parser.add_argument("-w", action="store")
+    parser.add_argument("-x", action="store")
+    parser.add_argument("-m", nargs=argparse.REMAINDER)
+    parser.add_argument("-c", nargs=argparse.REMAINDER)
+    parser.add_argument("file", nargs=argparse.REMAINDER)
+
+    parsed_args = parser.parse_args(args)
+
+    if parsed_args.m is not None:
+        parsed_args.subprocess_args = parsed_args.m[1:]
+        parsed_args.m = parsed_args.m[0]
+        parsed_args.file = None
+    elif parsed_args.c is not None:
+        parsed_args.subprocess_args = parsed_args.c[1:]
+        parsed_args.c = parsed_args.c[0]
+        parsed_args.file = None
+    else:
+        if len(parsed_args.file) > 0 and parsed_args.file[0] == "--":
+            parsed_args.file = parsed_args.file[1:]
+        if len(parsed_args.file) == 0:
+            parsed_args.subprocess_args = []
+            parsed_args.file = None
+        else:
+            parsed_args.subprocess_args = parsed_args.file[1:]
+            parsed_args.file = parsed_args.file[0]
+
+    return parsed_args
+
+
+def main() -> None:
+    """Test python command line parser."""
+    print(str(parse_python_command_line(["-B", "-m", "pip", "install", "-U", "cibuildwheel"])))  # noqa: T201
+    print(str(parse_python_command_line(["-B", "pip.py", "install", "-U", "cibuildwheel"])))  # noqa: T201
+    print(str(parse_python_command_line(["-B", "--", "--pip.py", "install", "-U", "cibuildwheel"])))  # noqa: T201
+    print(  # noqa: T201
+        str(parse_python_command_line(["-B", "-c", "import sys; print(sys.argv[1:])", "install", "-U", "cibuildwheel"]))
+    )
+    print(str(parse_python_command_line(["-B"])))  # noqa: T201
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/macaron/code_analyzer/dataflow_analysis/core.py b/src/macaron/code_analyzer/dataflow_analysis/core.py
new file mode 100644
index 000000000..5a33ef56a
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/core.py
@@ -0,0 +1,695 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Core dataflow analysis framework definitions and algorithm."""
+
+from __future__ import annotations
+
+import functools
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from collections.abc import Callable, Iterator, Sequence
+from dataclasses import dataclass
+from queue import Queue
+from typing import Generic, Protocol, TypeGuard, TypeVar
+
+from macaron.code_analyzer.dataflow_analysis import facts
+from macaron.errors import CallGraphError
+
+# Debug sequence number used to provide ordering information in debug graph.
+# note: not thread safe
+DEBUG_SEQUENCE_NUMBER = 0
+
+
+def reset_debug_sequence_number() -> None:
+    """Reset debug sequence number."""
+    global DEBUG_SEQUENCE_NUMBER  # pylint: disable=global-statement
+    DEBUG_SEQUENCE_NUMBER = 0
+
+
+def get_debug_sequence_number() -> int:
+    """Get current debug sequence number value."""
+    return DEBUG_SEQUENCE_NUMBER
+
+
+def increment_debug_sequence_number() -> None:
+    """Increment debug sequence number."""
+    global DEBUG_SEQUENCE_NUMBER  # pylint: disable=global-statement
+    DEBUG_SEQUENCE_NUMBER = DEBUG_SEQUENCE_NUMBER + 1
+
+
+@dataclass(frozen=True)
+class StateDebugLabel:
+    """Label for state fact providing information useful for debugging.
+
+    Provides a record of analysis ordering and whether the fact was just copied
+    from another state rather than newly produced.
+    """
+
+    #: Sequence number at time when state fact was created.
+    sequence_number: int
+    #: Whether the state fact is just copied from another state rather than newly produced."""
+    copied: bool
+
+
+class StateTransferFilter(ABC):
+    """Interface for state transfer filters, which filter out state facts by location."""
+
+    @abstractmethod
+    def should_transfer(self, loc: facts.Location) -> bool:
+        """Return whether facts with the given locations should be transferred or else filtered out."""
+
+
+class State:
+    """Representation of the abstract storage state at some program point.
+
+    Consists of a set of abstract locations, each associated with a set of possible values.
+    """
+
+    #: Mapping of locations to a set of possible values.
+    #: Values are annotated with a label containing info relevant for debugging
+    state: dict[facts.Location, dict[facts.Value, StateDebugLabel]]
+
+    def __init__(self) -> None:
+        """Construct an empty state."""
+        self.state = defaultdict(dict)
+
+
+class DefaultStateTransferFilter(StateTransferFilter):
+    """Default state transfer filter that includes all locations."""
+
+    def should_transfer(self, loc: facts.Location) -> bool:
+        """Transfer all locations."""
+        return True
+
+
+# Convenience instance of DefaultStateTransferFilter
+DEFAULT_STATE_TRANSFER_FILTER = DefaultStateTransferFilter()
+
+
+class ExcludedLocsStateTransferFilter(StateTransferFilter):
+    """State transfer filter that excludes any locations in the given set."""
+
+    #: Locations to exclude.
+    excluded_locs: set[facts.Location]
+
+    def __init__(self, excluded_locs: set[facts.Location]) -> None:
+        """Construct filter that excludes the given locations."""
+        self.excluded_locs = excluded_locs
+
+    def should_transfer(self, loc: facts.Location) -> bool:
+        """Return whether facts with the given locations should be transferred or else filtered out."""
+        return loc not in self.excluded_locs
+
+
+class ExcludedScopesStateTransferFilter(StateTransferFilter):
+    """State transfer filter that excludes any locations that are within the scopes in the given set."""
+
+    #: Scopes to exclude.
+    excluded_scopes: set[facts.Scope]
+
+    def __init__(self, excluded_scopes: set[facts.Scope]) -> None:
+        """Construct filter that excludes the given scopes."""
+        self.excluded_scopes = excluded_scopes
+
+    def should_transfer(self, loc: facts.Location) -> bool:
+        """Return whether facts with the given locations should be transferred or else filtered out."""
+        return loc.scope not in self.excluded_scopes
+
+
+def transfer_state(
+    src_state: State,
+    dest_state: State,
+    transfer_filter: StateTransferFilter = DEFAULT_STATE_TRANSFER_FILTER,
+    debug_is_copy: bool = True,
+) -> bool:
+    """Transfer/copy all facts in the src state to the dest state, except those excluded by the given filter.
+
+    Parameters
+    ----------
+    src_state: State
+        The state to transfer facts from.
+    dest_state: State
+        The state to modify by transferring facts to.
+    transfer_filter: StateTransferFilter
+        The filter to apply to the transferred facts (by default, transfer all).
+    debug_is_copy: bool
+        Whether the facts newly added to the dest state should be recorded as being copied or not (for debugging purposes).
+
+    Returns
+    -------
+    bool
+        Whether the dest state was modified.
+    """
+    changed = False
+    for loc, vals in src_state.state.items():
+        if not transfer_filter.should_transfer(loc):
+            continue
+        exit_vals = dest_state.state[loc]
+        for val, label in vals.items():
+            if val not in exit_vals:
+                exit_vals[val] = StateDebugLabel(get_debug_sequence_number(), True if debug_is_copy else label.copied)
+                changed = True
+    return changed
+
+
+class ExitType(ABC):
+    """Representation of an exit type, describing the manner in which the execution of a node may terminate."""
+
+    @abstractmethod
+    def __hash__(self) -> int:
+        pass
+
+    @abstractmethod
+    def __eq__(self, other: object) -> bool:
+        pass
+
+
+class DefaultExit(ExitType):
+    """Default, normal exit."""
+
+    def __hash__(self) -> int:
+        return 19391
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, DefaultExit)
+
+
+# Convenience instance of DefaultExit.
+DEFAULT_EXIT = DefaultExit()
+
+
+class Node(ABC):
+    """Base class of all node types in dataflow analysis.
+
+    Subclasses will represent the various program/semantic constructs,
+    and define how to analyse them.
+    """
+
+    #: Abstract state at the point before the execution of this node.
+    before_state: State
+
+    #: Abstract state at the point after the execution of this node, for each possible distinct exit type.
+    exit_states: dict[ExitType, State]
+
+    #: Sequence number at the point the node was created, recorded for debugging purposes.
+    created_debug_sequence_num: int
+    #: Log of begin/end sequence numbers each time this node was processed, recorded for debugging purposes.
+    processed_log: list[tuple[int, int]]
+
+    def __init__(self) -> None:
+        """Initialize with empty states."""
+        self.before_state = State()
+        self.exit_states = defaultdict(State)
+        self.created_debug_sequence_num = get_debug_sequence_number()
+        self.processed_log = []
+
+    @abstractmethod
+    def children(self) -> Iterator[Node]:
+        """Yield the child nodes of this node."""
+
+    @abstractmethod
+    def analyse(self) -> bool:
+        """Perform analysis of this node (and potentially any child nodes).
+
+        Update the exit states with the analysis result.
+        Returns whether anything was modified.
+        """
+        raise NotImplementedError
+
+    def is_processed(self) -> bool:
+        """Return whether this node has been processed."""
+        return len(self.processed_log) > 0
+
+    def notify_processed(self, begin_seq_num: int, end_seq_num: int) -> None:
+        """Record that this node has been processed."""
+        self.processed_log.append((begin_seq_num, end_seq_num))
+
+    def get_exit_state_transfer_filter(self) -> StateTransferFilter:
+        """Return the state transfer filter applicable to the exit state of this node.
+
+        By default, nothing is excluded. Subclasses should override to provide appropriate filters
+        to avoid transferring state that will be irrelevant after the node exits.
+        """
+        return DEFAULT_STATE_TRANSFER_FILTER
+
+    def __hash__(self) -> int:
+        return id(self)
+
+    def __eq__(self, other: object) -> bool:
+        return self is other
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a table of stringified properties, describing the details of this node, for debugging purposes.
+
+        The returned properties table is a mapping of name to value-set, which can be rendered via the functions
+        in the printing module.
+        """
+        return {}
+
+
+def node_is_not_none(node: Node | None) -> TypeGuard[Node]:
+    """Return whether the given node is not None."""
+    return node is not None
+
+
+def traverse_bfs(node: Node) -> Iterator[Node]:
+    """Traverse the node tree in a breadth-first manner, yielding the nodes (including this node) in traversal order."""
+    queue: Queue[Node] = Queue()
+    queue.put(node)
+    while not queue.empty() > 0:
+        next_node = queue.get()
+        yield next_node
+        for child in next_node.children():
+            queue.put(child)
+
+
+def build_parent_mapping(node: Node) -> dict[Node, Node]:
+    """Construct a mapping of nodes to their parent nodes."""
+    parents: dict[Node, Node] = {}
+
+    queue: Queue[Node] = Queue()
+    queue.put(node)
+    while not queue.empty():
+        next_node = queue.get()
+        for child in next_node.children():
+            parents[child] = next_node
+            queue.put(child)
+
+    return parents
+
+
+class NodeForest:
+    """A collection of independent root nodes (with no control-flow or relation between them)."""
+
+    #: Collection of root nodes.
+    root_nodes: list[Node]
+    #: Mapping of nodes to their parent nodes.
+    parents: dict[Node, Node]
+
+    def __init__(self, root_nodes: list[Node]) -> None:
+        """Construct a NodeForest for the given nodes, and build the parent mapping."""
+        self.root_nodes = root_nodes
+        self.parents = {}
+        for root_node in root_nodes:
+            root_node_parents = build_parent_mapping(root_node)
+            self.parents.update(root_node_parents)
+
+
+class ControlFlowGraph:
+    """Graph structure to represent control flow graphs."""
+
+    #: Entry node.
+    entry: Node
+    #: Graph of successor edges.
+    #: Each edge is from a particular exit of a particular node, either to a node or to an exit of the control flow itself.
+    successors: dict[Node, dict[ExitType, set[Node | ExitType]]]
+
+    def __init__(self, entry: Node) -> None:
+        """Construct an initially-empty control flow graph."""
+        self.entry = entry
+        self.successors = defaultdict(lambda: defaultdict(set))
+
+    def get_entry(self) -> Node:
+        """Return the entry node."""
+        return self.entry
+
+    def add_successor(self, src: Node, exit_type: ExitType, dest: Node | ExitType) -> None:
+        """Add a successor edge to the control flow graph."""
+        self.successors[src][exit_type].add(dest)
+
+    def get_successors(self, node: Node, exit_type: ExitType) -> set[Node | ExitType]:
+        """Return the successors for a particular exit of a particular node."""
+        return self.successors[node][exit_type]
+
+    @staticmethod
+    def create_from_sequence(seq: Sequence[Node]) -> ControlFlowGraph:
+        """Construct a linear sequence of nodes."""
+        if len(seq) == 0:
+            raise CallGraphError("cannot create control flow graph from empty sequence")
+        cfg = ControlFlowGraph(seq[0])
+        prev_node = seq[0]
+        for node in seq[1:]:
+            cfg.add_successor(prev_node, DEFAULT_EXIT, node)
+            prev_node = node
+
+        cfg.add_successor(prev_node, DEFAULT_EXIT, DEFAULT_EXIT)
+
+        return cfg
+
+
+class ControlFlowGraphNode(Node):
+    """Base class for nodes representing control-flow constructs.
+
+    Defines the generic algorithm for analysing control flow graphs.
+    Subclasses will define the child nodes and concrete graph structure.
+    """
+
+    def _propagate_edges(
+        self,
+        worklist: set[Node],
+        src_state: State,
+        state_transfer_filter: StateTransferFilter,
+        successors: set[Node | ExitType],
+    ) -> bool:
+        changed = False
+        for successor in successors:
+            if isinstance(successor, Node):
+                transfer_changed = transfer_state(src_state, successor.before_state, state_transfer_filter)
+                changed = changed or transfer_changed
+                if transfer_changed or not successor.is_processed():
+                    worklist.add(successor)
+            elif isinstance(successor, ExitType):
+                changed = transfer_state(src_state, self.exit_states[successor], state_transfer_filter) or changed
+        return changed
+
+    def analyse(self) -> bool:
+        """Perform analysis of this node.
+
+        Performs analysis of the child nodes and propagates state from the exit state of an updated node to the before
+        state of its successor nodes, according to the control-flow-graph structure, then analyses the successor nodes,
+        and so on until a fixpoint is reached and no further updates may be made to any node states.
+
+        Returns whether anything was modified.
+        """
+        begin_seq_num = get_debug_sequence_number()
+        entry_node = self.get_entry()
+        if entry_node is None:
+            changed = transfer_state(self.before_state, self.exit_states[DEFAULT_EXIT])
+            increment_debug_sequence_number()
+            return changed
+
+        changed = transfer_state(self.before_state, entry_node.before_state)
+        increment_debug_sequence_number()
+
+        worklist = {entry_node}
+
+        while len(worklist) > 0:
+            next_node = worklist.pop()
+            next_changed = next_node.analyse()
+            changed = changed or next_changed
+
+            next_state_transfer_filter = next_node.get_exit_state_transfer_filter()
+
+            for exit_type, exit_state in next_node.exit_states.items():
+                successors = self.get_successors(next_node, exit_type)
+                changed = self._propagate_edges(worklist, exit_state, next_state_transfer_filter, successors) or changed
+
+            increment_debug_sequence_number()
+
+        self.notify_processed(begin_seq_num, get_debug_sequence_number() - 1)
+        return changed
+
+    @abstractmethod
+    def get_entry(self) -> Node | None:
+        """Return the entry node."""
+
+    @abstractmethod
+    def get_successors(self, node: Node, exit_type: ExitType) -> set[Node | ExitType]:
+        """Return the successors for a particular exit of a particular node."""
+
+
+class StatementNode(Node):
+    """Base class for nodes representing constructs with direct effects (and no child nodes).
+
+    Subclasses will define the effects that apply when the node is executed.
+    """
+
+    def analyse(self) -> bool:
+        """Perform analysis of this node, by applying the effects to update the after state.
+
+        Returns whether anything was modified.
+        """
+        begin_seq_num = get_debug_sequence_number()
+        new_exit_states = self.apply_effects(self.before_state)
+        changed = False
+        for new_exit_type, new_exit_state in new_exit_states.items():
+            changed = transfer_state(new_exit_state, self.exit_states[new_exit_type], debug_is_copy=False) or changed
+
+        self.notify_processed(begin_seq_num, get_debug_sequence_number())
+        increment_debug_sequence_number()
+        return changed
+
+    def children(self) -> Iterator[Node]:
+        """Yield nothing, as statements have no child nodes."""
+        yield from ()
+
+    @abstractmethod
+    def apply_effects(self, before_state: State) -> dict[ExitType, State]:
+        """Apply the effects of the statement, given the before state, returning the resulting exit state."""
+
+
+class NoOpStatementNode(StatementNode):
+    """Statement that has no effect."""
+
+    def apply_effects(self, before_state: State) -> dict[ExitType, State]:
+        """Apply the effects of the no-op, returning an exit state that is the same as the before state."""
+        state = State()
+        transfer_state(before_state, state)
+        return {DEFAULT_EXIT: state}
+
+
+class InterpretationKey(Protocol):
+    """Interpretation key used to identify interpretations that have been produced before.
+
+    Must support hashing and equality comparison to allow use as a dict key.
+    """
+
+    @abstractmethod
+    def __hash__(self) -> int:
+        pass
+
+    @abstractmethod
+    def __eq__(self, other: object, /) -> bool:
+        pass
+
+
+class InterpretationNode(Node):
+    """Base class for nodes representing constructs requiring interpretation.
+
+    Such constructs must be interpreted to produce possibly-multiple child nodes representing possible
+    interpretations of the semantics of the node.
+
+    Analysing the interpretation node will apply the combined effects of all of the possible interpretations.
+    Subclasses will define how to identify the possible interpretations and generate the corresponding nodes.
+    """
+
+    #: The generated interpretations of this node, identified/deduplicated by some interpretation key.
+    interpretations: dict[InterpretationKey, Node]
+
+    def __init__(self) -> None:
+        """Initialize node with no interpretations."""
+        super().__init__()
+        self.interpretations = {}
+
+    def children(self) -> Iterator[Node]:
+        """Yield each of the possible interpretations."""
+        yield from self.interpretations.values()
+
+    def update_interpretations(self) -> bool:
+        """Analyse the node to identify interpretations.
+
+        Analysis is done in the context of the current before state, adding any
+        new interpretations generated to the interpretations dict.
+        """
+        latest_interpretations = self.identify_interpretations(self.before_state)
+        new_interpretations = {x: y for (x, y) in latest_interpretations.items() if x not in self.interpretations}
+        for new_interpretation, build_node in new_interpretations.items():
+            self.interpretations[new_interpretation] = build_node()
+
+        return len(new_interpretations) != 0
+
+    @abstractmethod
+    def identify_interpretations(self, state: State) -> dict[InterpretationKey, Callable[[], Node]]:
+        """Analyse the node, in the context of the given before state, to identify interpretations.
+
+        Returns, for each discovered interpretation, an identifying interpretation key that can be used
+        to determine if the interpretation has been produced previously, and a callable that generates
+        the node representing that interpretation (used to generate the node if the interpretation is new,
+        otherwise the previously-generated node will be reused).
+        """
+
+    def analyse(self) -> bool:
+        """Perform analysis of this node, by analysing each possible interpretation.
+
+        Merges the exit states of each analysed interpretation to update the exit state of this node.
+
+        Returns whether anything was modified.
+        """
+        begin_seq_num = get_debug_sequence_number()
+
+        interpretations_changed = self.update_interpretations()
+
+        increment_debug_sequence_number()
+
+        sub_nodes_changed = False
+        exit_changed = False
+
+        key_transfer_changed: dict[InterpretationKey, bool] = {}
+
+        for key, node in self.interpretations.items():
+            transfer_changed = transfer_state(self.before_state, node.before_state)
+            key_transfer_changed[key] = transfer_changed
+            sub_nodes_changed = sub_nodes_changed or transfer_changed
+
+        increment_debug_sequence_number()
+
+        for key, node in self.interpretations.items():
+            if key_transfer_changed[key] or not node.is_processed():
+                analyse_changed = node.analyse()
+                sub_nodes_changed = sub_nodes_changed or analyse_changed
+
+        for node in self.interpretations.values():
+            for exit_type, exit_state in node.exit_states.items():
+                if exit_type not in self.exit_states:
+                    exit_changed = True
+                exit_changed = (
+                    transfer_state(exit_state, self.exit_states[exit_type], node.get_exit_state_transfer_filter())
+                    or exit_changed
+                )
+
+        self.notify_processed(begin_seq_num, get_debug_sequence_number())
+        increment_debug_sequence_number()
+
+        return interpretations_changed or sub_nodes_changed or exit_changed
+
+
+R_co = TypeVar("R_co", covariant=True)
+
+
+@dataclass(frozen=True)
+class OwningContextRef(Generic[R_co]):
+    """A reference to a part of a node's context that "owns" it.
+
+    Ownership is used to identify what scopes are tied to a particular node
+    such that they cease to exist or become irrelevant after the node exits,
+    and thus any values stored in locations within those scopes may be erased
+    from the state beyond that point to simplify the state.
+    """
+
+    ref: R_co
+
+    def get_non_owned(self) -> NonOwningContextRef[R_co]:
+        """Return a non owning reference to the same object."""
+        return NonOwningContextRef(self.ref)
+
+
+@dataclass(frozen=True)
+class NonOwningContextRef(Generic[R_co]):
+    """A reference to a part of a node's context that does not "own" it.
+
+    Ownership is used to identify what scopes are tied to a particular node
+    such that they cease to exist or become irrelevant after the node exits,
+    and thus any values stored in locations within those scopes may be erased
+    from the state beyond that point to simplify the state.
+    """
+
+    ref: R_co
+
+    def get_non_owned(self) -> NonOwningContextRef[R_co]:
+        """Return a non-owning reference to the same object."""
+        return self
+
+
+# A context ref may be owning or non-owning.
+ContextRef = OwningContextRef[R_co] | NonOwningContextRef[R_co]
+
+
+class Context(ABC):
+    """Base class for node contexts.
+
+    Represents the necessary context that influences the analysis of a node,
+    primarily that of identifying the concrete scopes that fill particular
+    roles in the node.
+    """
+
+    @abstractmethod
+    def direct_refs(self) -> Iterator[ContextRef[Context] | ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+
+    def owned_scopes(self) -> Iterator[OwningContextRef[facts.Scope]]:
+        """Yield the scopes that are owned by this context.
+
+        Owned scopes are those that are directly referenced by owning references or scopes
+        that are indirectly referenced by owning references, through referenced contexts that
+        are referenced by owning references.
+        """
+        for ref in self.direct_refs():
+            if isinstance(ref, OwningContextRef):
+                if isinstance(ref.ref, Context):
+                    yield from ref.ref.owned_scopes()
+                else:
+                    yield ref
+
+
+@dataclass(frozen=True)
+class AnalysisContext(Context):
+    """Outermost context of the analysis.
+
+    Records the path to the repo checkout, to allow the analysis access to files in the repo.
+    """
+
+    repo_path: str | None
+
+    def direct_refs(self) -> Iterator[ContextRef[Context] | ContextRef[facts.Scope]]:
+        """No direct references, yields nothing."""
+        yield from []
+
+
+class SimpleSequence(ControlFlowGraphNode):
+    """Control-flow-graph node representing the execution of a sequence of nodes."""
+
+    #: The sequence of nodes to execute.
+    seq: list[Node]
+    #: The control flow graph.
+    _cfg: ControlFlowGraph
+
+    def __init__(self, seq: list[Node]) -> None:
+        """Construct control-flow-graph from sequence."""
+        super().__init__()
+        self.seq = seq
+        self._cfg = ControlFlowGraph.create_from_sequence(seq)
+
+    def children(self) -> Iterator[Node]:
+        """Yield the nodes in the sequence."""
+        yield from self.seq
+
+    def get_entry(self) -> Node:
+        """Return the entry node, the first in the sequence."""
+        return self.seq[0]
+
+    def get_successors(self, node: Node, exit_type: ExitType) -> set[Node | ExitType]:
+        """Return the successor for a given node (the next in the sequence or the exit in the case of the last node)."""
+        return self._cfg.get_successors(node, exit_type)
+
+
+class SimpleAlternatives(InterpretationNode):
+    """Interpretation node representing a concrete set of alternative nodes."""
+
+    #: The alternatives.
+    alts: list[Node]
+
+    def __init__(self, alts: list[Node]) -> None:
+        """Initialize node."""
+        super().__init__()
+        self.alts = alts
+
+    def identify_interpretations(self, state: State) -> dict[InterpretationKey, Callable[[], Node]]:
+        """Return the interpretations of this node, that is, each of the alternatives."""
+
+        def get_alt(index: int) -> Node:
+            return self.alts[index]
+
+        return {i: functools.partial(get_alt, i) for i in range(0, len(self.alts))}
+
+
+def get_owned_scopes(context: ContextRef[Context]) -> set[facts.Scope]:
+    """Return the set of scopes owned via the given reference to a context.
+
+    Returns empty if the given reference is non-owning.
+    """
+    match context:
+        case OwningContextRef(ref):
+            return {scope.ref for scope in ref.owned_scopes()}
+        case NonOwningContextRef(ref):
+            return set()
diff --git a/src/macaron/code_analyzer/dataflow_analysis/evaluation.py b/src/macaron/code_analyzer/dataflow_analysis/evaluation.py
new file mode 100644
index 000000000..69d5a022c
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/evaluation.py
@@ -0,0 +1,772 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Functions for evaluating and resolving dataflow analysis expressions."""
+
+from __future__ import annotations
+
+import base64
+import os.path
+from dataclasses import dataclass
+from typing import TypeVar
+
+from frozendict import frozendict
+
+from macaron.code_analyzer.dataflow_analysis import bash, core, facts
+from macaron.errors import CallGraphError
+
+
+def evaluate(node: core.Node, value: facts.Value) -> set[tuple[facts.Value, ReadBindings]]:
+    """Evaluate the given value, at the point immediately prior to the execution of the given node.
+
+    Parameters
+    ----------
+    node: core.Node
+        The node at which to evaluate the value (i.e. in the context of the before state of the node).
+    value: facts.Value
+        The value expression to evaluate.
+
+    Returns
+    -------
+    set[tuple[facts.Value, ReadBindings]]
+        The set of possible resolved values for the value expression, each with a record of the
+        resolved value chosen for any read expressions.
+    """
+    eval_transformer = EvaluationTransformer(node.before_state)
+    return eval_transformer.transform_value(value)
+
+
+@dataclass(frozen=True)
+class WriteStatement:
+    """Representation of a write to a given location of a given value."""
+
+    #: The location to write to.
+    location: facts.Location
+    #: The value to write.
+    value: facts.Value
+
+    def perform_write(self, before_state: core.State) -> tuple[core.State, set[facts.Location]]:
+        """Return a state containing only the values stored by the write operation, in context of the before state.
+
+        Also returns the set of locations within that state which should be considered to have been overwritten,
+        erasing any previous values.
+        """
+        eval_transformer = EvaluationTransformer(before_state)
+        written_state = core.State()
+        evaluated_writes = eval_transformer.transform_write(self.location, self.value)
+        for loc, val, _ in evaluated_writes:
+            written_state.state[loc][val] = core.StateDebugLabel(core.get_debug_sequence_number(), False)
+        # Currently, never erases previous values.
+        return (written_state, set())
+
+
+@dataclass(frozen=True)
+class StatementSet:
+    """Representation of a set of (simultaneous) write operations."""
+
+    #: The set of writes.
+    stmts: set[WriteStatement]
+
+    def apply_effects(self, before_state: core.State) -> core.State:
+        """Apply the effect of the set of writes, returning the resulting state."""
+        final_state = core.State()
+        final_overwritten_locs: set[facts.Location] = set()
+        for stmt in self.stmts:
+            written_state, overwritten_locs = stmt.perform_write(before_state)
+            for loc in overwritten_locs:
+                final_overwritten_locs.add(loc)
+            core.transfer_state(written_state, final_state, debug_is_copy=False)
+
+        core.transfer_state(before_state, final_state, core.ExcludedLocsStateTransferFilter(final_overwritten_locs))
+        return final_state
+
+    @staticmethod
+    def union(*stmt_sets: StatementSet) -> StatementSet:
+        """Combine multiple write sets into one."""
+        stmts: set[WriteStatement] = set()
+        for stmt_set in stmt_sets:
+            for stmt in stmt_set.stmts:
+                stmts.add(stmt)
+        return StatementSet(stmts)
+
+
+class ParameterPlaceholderTransformer:
+    """Expression transformer which replaces parameter placeholders with their corresponding bound values."""
+
+    #: Whether to raise an exception if a parameter is found with no provided binding.
+    allow_unbound_params: bool
+    #: Bindings for value parameter placeholders, mapping parameter name to bound value expression.
+    value_parameter_binds: dict[str, facts.Value]
+    #: Bindings for location parameter placeholders, mapping parameter name to bound location expression.
+    location_parameter_binds: dict[str, facts.LocationSpecifier]
+    #: Bindings for scope parameter placeholders, mapping parameter name to bound scope.
+    scope_parameter_binds: dict[str, facts.Scope]
+
+    def __init__(
+        self,
+        allow_unbound_params: bool = True,
+        value_parameter_binds: dict[str, facts.Value] | None = None,
+        location_parameter_binds: dict[str, facts.LocationSpecifier] | None = None,
+        scope_parameter_binds: dict[str, facts.Scope] | None = None,
+    ) -> None:
+        """Initialize transformer with bindings.
+
+        Parameters
+        ----------
+        allow_unbound_params: bool
+            Whether to raise an exception if a parameter is found with no provided binding.
+        value_parameter_binds: dict[str, facts.Value] | None
+            Bindings for value parameter placeholders, mapping parameter name to bound value expression.
+        location_parameter_binds: dict[str, facts.Value] | None
+            Bindings for location parameter placeholders, mapping parameter name to bound location expression.
+        scope_parameter_binds: dict[str, facts.Value] | None
+            Bindings for scope parameter placeholders, mapping parameter name to bound scope.
+        """
+        self.allow_unbound_params = allow_unbound_params
+        self.value_parameter_binds = value_parameter_binds or {}
+        self.location_parameter_binds = location_parameter_binds or {}
+        self.scope_parameter_binds = scope_parameter_binds or {}
+
+    def transform_value(self, value: facts.Value) -> facts.Value:
+        """Transform given value expression.
+
+        Returns a value expression with any parameter placeholders replaced with their bound values.
+        """
+        match value:
+            case facts.StringLiteral(_):
+                return value
+            case facts.Read(loc):
+                new_loc = self.transform_location(loc)
+                if new_loc is loc:
+                    return value
+                return facts.Read(new_loc)
+            case facts.ArbitraryNewData(_):
+                return value
+            case facts.UnaryStringOp(op, operand):
+                new_operand = self.transform_value(operand)
+                if new_operand is operand:
+                    return value
+                return facts.UnaryStringOp(op, new_operand)
+            case facts.BinaryStringOp(op, operand1, operand2):
+                new_operand1 = self.transform_value(operand1)
+                new_operand2 = self.transform_value(operand2)
+
+                if op == facts.BinaryStringOperator.STRING_CONCAT:
+                    return facts.BinaryStringOp.get_string_concat(new_operand1, new_operand2)
+
+                # if new_operand1 is operand1 and new_operand2 is operand2:
+                #     return value
+                # return facts.BinaryStringOp(op, new_operand1, new_operand2)
+            case facts.ParameterPlaceholderValue(name):
+                if name in self.value_parameter_binds:
+                    return self.value_parameter_binds[name]
+                if not self.allow_unbound_params:
+                    raise CallGraphError("unbound value parameter: " + name)
+                return value
+            case facts.InstalledPackage(name, version, distribution, url):
+                new_name = self.transform_value(name)
+                new_version = self.transform_value(version)
+                new_distribution = self.transform_value(distribution)
+                new_url = self.transform_value(url)
+                if new_name is name and new_version is version and new_distribution is distribution and new_url is url:
+                    return value
+                return facts.InstalledPackage(new_name, new_version, new_distribution, new_url)
+            case facts.SingleBashTokenConstraint(val):
+                new_val = self.transform_value(val)
+                if new_val is val:
+                    return value
+                return facts.SingleBashTokenConstraint(new_val)
+            case facts.Symbolic(sym_val):
+                new_sym_val = self.transform_value(sym_val)
+                if new_sym_val is sym_val:
+                    return value
+                return facts.Symbolic(new_sym_val)
+        raise CallGraphError("unknown facts.Value type: " + value.__class__.__name__)
+
+    def transform_location(self, location: facts.Location) -> facts.Location:
+        """Transform given location expression.
+
+        Returns a location expression with any parameter placeholders replaced with their bound values.
+        """
+        new_scope = self.transform_scope(location.scope)
+        new_location_spec = self.transform_location_specifier(location.loc)
+        if new_scope is location.scope and new_location_spec is location.loc:
+            return location
+        return facts.Location(new_scope, new_location_spec)
+
+    def transform_location_specifier(self, location: facts.LocationSpecifier) -> facts.LocationSpecifier:
+        """Transform given location specifier expression.
+
+        Returns a location specifier expression with any parameter placeholders replaced with their bound values.
+        """
+        match location:
+            case facts.Filesystem(path):
+                new_path = self.transform_value(path)
+                if new_path is path:
+                    return location
+                return facts.Filesystem(new_path)
+            case facts.Variable(name):
+                new_name = self.transform_value(name)
+                if new_name is name:
+                    return location
+                return facts.Variable(new_name)
+            case facts.Artifact(name, file):
+                new_name = self.transform_value(name)
+                new_file = self.transform_value(file)
+                if new_name is name and new_file is file:
+                    return location
+                return facts.Artifact(new_name, new_file)
+            case facts.FilesystemAnyUnderDir(path):
+                new_path = self.transform_value(path)
+                if new_path is path:
+                    return location
+                return facts.FilesystemAnyUnderDir(new_path)
+            case facts.ArtifactAnyFilename(name):
+                new_name = self.transform_value(name)
+                if new_name is name:
+                    return location
+                return facts.ArtifactAnyFilename(new_name)
+            case facts.ParameterPlaceholderLocation(name):
+                if name in self.location_parameter_binds:
+                    return self.location_parameter_binds[name]
+                if not self.allow_unbound_params:
+                    raise CallGraphError("unbound location parameter: " + name)
+                return location
+            case facts.Console():
+                return location
+            case facts.Installed(name):
+                new_name = self.transform_value(name)
+                if new_name is name:
+                    return location
+                return facts.Installed(new_name)
+        raise CallGraphError("unknown location type: " + location.__class__.__name__)
+
+    def transform_scope(self, scope: facts.Scope) -> facts.Scope:
+        """Transform given scope.
+
+        Returns a scope with any parameter placeholders replaced with their bound values.
+        """
+        if isinstance(scope, facts.ParameterPlaceholderScope):
+            if scope.name in self.scope_parameter_binds:
+                return self.scope_parameter_binds[scope.name]
+            if not self.allow_unbound_params:
+                raise CallGraphError("unbound scope parameter: " + scope.name)
+        return scope
+
+    def transform_statement(self, statement: WriteStatement) -> WriteStatement:
+        """Transform given write statement.
+
+        Returns a write statement with any parameter placeholders replaced with their bound values.
+        """
+        new_location = self.transform_location(statement.location)
+        new_value = self.transform_value(statement.value)
+        if new_location is statement.location and new_value is statement.value:
+            return statement
+        return WriteStatement(new_location, new_value)
+
+    def transform_statement_set(self, statement_set: StatementSet) -> StatementSet:
+        """Transform given write statement set.
+
+        Returns a write statement set with any parameter placeholders replaced with their bound values.
+        """
+        changed = False
+        new_stmts: set[WriteStatement] = set()
+        for stmt in statement_set.stmts:
+            new_stmt = self.transform_statement(stmt)
+            if new_stmt is not stmt:
+                changed = True
+            new_stmts.add(new_stmt)
+
+        if not changed:
+            return statement_set
+        return StatementSet(new_stmts)
+
+
+T = TypeVar("T")
+
+
+def is_singleton(s: set[T], e: T) -> bool:
+    """Return whether the given set contains only the single given element."""
+    return len(s) == 1 and next(iter(s)) == e
+
+
+def is_singleton_no_bindings(s: set[tuple[T, ReadBindings]], e: T) -> bool:
+    """Return whether the given set contains only the single given element with no read bindings."""
+    return len(s) == 1 and next(iter(s)) == (e, READBINDINGS_EMPTY)
+
+
+def scope_matches(read_scope: facts.Scope, stored_scope: facts.Scope) -> bool:
+    """Return whether the given read scope matches the given stored scope.
+
+    Matching means that a read of the read scope may return values from the stored scope.
+    """
+    cur_scope: facts.Scope | None = read_scope
+    while cur_scope is not None:
+        if cur_scope == stored_scope:
+            return True
+        cur_scope = cur_scope.outer_scope
+    return False
+
+
+def location_subsumes(loc: facts.LocationSpecifier, subloc: facts.LocationSpecifier) -> bool:
+    """Return whether the given location subsumes the given sub location.
+
+    Subsumption means that a read of subloc may be considered to be a read of loc or some part thereof.
+    """
+    if loc == subloc:
+        return True
+
+    match loc, subloc:
+        case facts.Filesystem(facts.StringLiteral(loc_path_lit)), facts.Filesystem(
+            facts.StringLiteral(subloc_path_lit)
+        ):
+            # Ignore superficial differences in file path due to "./" relative paths.
+            if (
+                not loc_path_lit.startswith("/")
+                and not subloc_path_lit.startswith("/")
+                and loc_path_lit.removeprefix("./") == subloc_path_lit.removeprefix("./")
+            ):
+                return True
+        case facts.FilesystemAnyUnderDir(facts.StringLiteral(dir_lit)), facts.Filesystem(
+            facts.StringLiteral(subloc_path_lit)
+        ):
+            # A file path under the same dir as a FilesystemAnyUnderDir is subsumed.
+            if subloc_path_lit.startswith(dir_lit.removesuffix("/") + "/"):
+                return True
+    return False
+
+
+def get_values_for_subsumed_read(
+    read_loc: facts.LocationSpecifier, state_loc: facts.LocationSpecifier, state_vals: set[facts.Value]
+) -> set[facts.Value]:
+    """Return the set of values stored in the state location, if relevant for the given read location."""
+    match read_loc, state_loc:
+        case facts.ArtifactAnyFilename(read_artifact_name), facts.Artifact(state_artifact_name, state_artifact_file):
+            if read_artifact_name == state_artifact_name:
+                return {state_artifact_file}
+
+    if location_subsumes(state_loc, read_loc):
+        return state_vals
+
+    return set()
+
+
+class ReadBindings:
+    """Set of bindings of read expressions to values bound as the result of those read expressions."""
+
+    #: Mapping of read expressions to bound values.
+    bindings: frozendict[facts.Read, facts.Value]
+
+    def __init__(self, binds: frozendict[facts.Read, facts.Value] | None = None) -> None:
+        """Initialize with given bindings."""
+        self.bindings = binds or frozendict()
+
+    def __len__(self) -> int:
+        """Return the number of bindings in the set."""
+        return len(self.bindings)
+
+    def with_binding(self, read: facts.Read, value: facts.Value) -> ReadBindings | None:
+        """Return bindings with the given additional binding, or None if the bindings conflict."""
+        if read in self.bindings:
+            if self.bindings[read] != value:
+                return None
+            return self
+        new_binds = self.bindings.set(read, value)
+        return ReadBindings(new_binds)
+
+    def with_bindings(self, bindings: ReadBindings) -> ReadBindings | None:
+        """Return bindings with the given additional bindings, or None if the bindings conflict."""
+        if len(bindings) == 0:
+            return self
+        if len(self) == 0:
+            return bindings
+
+        for read, val in bindings.bindings.items():
+            if read in self.bindings:
+                if self.bindings[read] != val:
+                    return None
+
+        combined_bindings = frozendict({**self.bindings, **bindings.bindings})
+        return ReadBindings(combined_bindings)
+
+    @staticmethod
+    def combine_bindings(bindings_list: list[ReadBindings]) -> ReadBindings | None:
+        """Return bindings combining all bindings in the given list, or None if the bindings conflict."""
+        if len(bindings_list) == 0:
+            return READBINDINGS_EMPTY
+
+        cur_binding: ReadBindings | None = bindings_list[0]
+        for bindings in bindings_list[1:]:
+            cur_binding = cur_binding.with_bindings(bindings) if cur_binding is not None else None
+            if cur_binding is None:
+                return None
+        return cur_binding
+
+    def __hash__(self) -> int:
+        return hash(self.bindings)
+
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, ReadBindings):
+            return self.bindings == other.bindings
+        return False
+
+    def __repr__(self) -> str:
+        return str(self.bindings)
+
+
+# Convenience instance of empty bindings.
+READBINDINGS_EMPTY = ReadBindings()
+
+
+class EvaluationTransformer:
+    """Expression transformer which evaluates the expression to produce a set of resolved values.
+
+    The expression is evaluated in the context of a specified abstract storage state.
+    """
+
+    #: The state from which to resolve reads.
+    state: core.State
+
+    def __init__(self, state: core.State) -> None:
+        """Initialize transformer with state from which to resolve reads."""
+        self.state = state
+
+    def transform_write(
+        self, location: facts.Location, value: facts.Value
+    ) -> set[tuple[facts.Location, facts.Value, ReadBindings]]:
+        """Transform a write location and value, returning the set of resolved values with the necessary bindings."""
+        evaluated_locations = self.transform_location(location)
+        evaluated_values = self.transform_value(value)
+        result: set[tuple[facts.Location, facts.Value, ReadBindings]] = set()
+        for loc, loc_bindings in evaluated_locations:
+            for val, val_bindings in evaluated_values:
+                combined_bindings = loc_bindings.with_bindings(val_bindings)
+                if combined_bindings is not None:
+                    result.add((loc, val, combined_bindings))
+        return result
+
+    def transform_value(self, value: facts.Value) -> set[tuple[facts.Value, ReadBindings]]:
+        """Transform a value expression, returning the set of resolved values with the necessary bindings."""
+        match value:
+            case facts.StringLiteral(_):
+                return {(value, READBINDINGS_EMPTY)}
+            case facts.Read(loc):
+                # Read values from the state.
+                new_locs = self.transform_location(loc)
+                read_vals: set[tuple[facts.Value, ReadBindings]] = set()
+                for new_loc, new_loc_bindings in new_locs:
+                    read_vals.add((facts.Symbolic(facts.Read(new_loc)), new_loc_bindings))
+
+                    for state_loc, state_vals in self.state.state.items():
+                        if scope_matches(new_loc.scope, state_loc.scope):
+                            for read_val in get_values_for_subsumed_read(
+                                new_loc.loc, state_loc.loc, set(state_vals.keys())
+                            ):
+                                combined_bindings = new_loc_bindings.with_binding(value, read_val)
+                                if combined_bindings is not None:
+                                    read_vals.add((read_val, combined_bindings))
+                return read_vals
+            case facts.ArbitraryNewData(_):
+                return {(value, READBINDINGS_EMPTY)}
+            case facts.UnaryStringOp(op, operand):
+                new_operands = self.transform_value(operand)
+                if op == facts.UnaryStringOperator.BASENAME:
+                    # Concretely evaluate basename operator for string literal.
+                    basename_result: set[tuple[facts.Value, ReadBindings]] = set()
+                    for new_operand, new_operand_bindings in new_operands:
+                        if isinstance(new_operand, facts.StringLiteral):
+                            basename_result.add(
+                                (facts.StringLiteral(os.path.basename(new_operand.literal)), new_operand_bindings)
+                            )
+                    return basename_result
+                if op == facts.UnaryStringOperator.BASE64DECODE:
+                    # Concretely evaluate base64 decode operator for string literal
+                    base64_decode_result: set[tuple[facts.Value, ReadBindings]] = set()
+                    for new_operand, new_operand_bindings in new_operands:
+                        if isinstance(new_operand, facts.StringLiteral):
+                            base64_decode_result.add(
+                                (
+                                    facts.StringLiteral(base64.b64decode(new_operand.literal).decode("utf-8")),
+                                    new_operand_bindings,
+                                )
+                            )
+                    return base64_decode_result
+                return set()
+            case facts.BinaryStringOp(op, operand1, operand2):
+                new_operand1s = self.transform_value(operand1)
+                new_operand2s = self.transform_value(operand2)
+                if op == facts.BinaryStringOperator.STRING_CONCAT:
+                    # Concretely evaluate string concatenation for concat of 2 string literals.
+                    concat_result: set[tuple[facts.Value, ReadBindings]] = set()
+                    for new_operand1, new_operand1_bindings in new_operand1s:
+                        for new_operand2, new_operand2_bindings in new_operand2s:
+                            if isinstance(new_operand1, facts.StringLiteral) and isinstance(
+                                new_operand2, facts.StringLiteral
+                            ):
+                                combined_bindings = new_operand1_bindings.with_bindings(new_operand2_bindings)
+                                if combined_bindings is not None:
+                                    # TODO Have some truncated symbolic representation for
+                                    # excessively long strings rather than just dropping them.
+                                    if len(new_operand1.literal) + len(new_operand2.literal) < 10000:
+                                        concat_result.add(
+                                            (
+                                                facts.StringLiteral(new_operand1.literal + new_operand2.literal),
+                                                combined_bindings,
+                                            )
+                                        )
+                    return concat_result
+
+                # return set()
+            case facts.SingleBashTokenConstraint(operand):
+                # For single bash token constraint, to evaluate a string literal, the literal is parsed
+                # as a bash expression, and if that results in a single element, then the constraint
+                # is met and the unmodified literal is returned, if it parses as multiple elements, then
+                # no resolved values are produced for that literal.
+                #
+                # Otherwise returns the constrained expression as is, while simplifying redundant
+                # multiply-nested constraints.
+                #
+                new_operands = self.transform_value(operand)
+                constraint_result: set[tuple[facts.Value, ReadBindings]] = set()
+                for new_operand, new_operand_bindings in new_operands:
+                    match new_operand:
+                        case facts.StringLiteral(lit):
+                            parsed_bash_expr = bash.parse_bash_expr(lit)
+                            if parsed_bash_expr is not None and len(parsed_bash_expr) == 1:
+                                constraint_result.add((new_operand, new_operand_bindings))
+
+                        case facts.SingleBashTokenConstraint(suboperand):
+                            constraint_result.add((facts.SingleBashTokenConstraint(suboperand), new_operand_bindings))
+                        case _:
+                            constraint_result.add((facts.SingleBashTokenConstraint(new_operand), new_operand_bindings))
+                return constraint_result
+            case facts.ParameterPlaceholderValue(name):
+                return set()
+            case facts.InstalledPackage(name, version, distribution, url):
+                # Resolve parameters and return every combination.
+                new_names = self.transform_value(name)
+                new_versions = self.transform_value(version)
+                new_distributions = self.transform_value(distribution)
+                new_urls = self.transform_value(url)
+                if (
+                    is_singleton_no_bindings(new_names, name)
+                    and is_singleton_no_bindings(new_versions, version)
+                    and is_singleton_no_bindings(new_distributions, distribution)
+                    and is_singleton_no_bindings(new_urls, url)
+                ):
+                    return {(value, READBINDINGS_EMPTY)}
+                result: set[tuple[facts.Value, ReadBindings]] = set()
+                for new_name, new_name_bindings in new_names:
+                    for new_version, new_version_bindings in new_versions:
+                        version_combined_bindings = new_name_bindings.with_bindings(new_version_bindings)
+                        if version_combined_bindings is None:
+                            continue
+                        for new_distribution, new_distribution_bindings in new_distributions:
+                            distribution_combined_bindings = version_combined_bindings.with_bindings(
+                                new_distribution_bindings
+                            )
+                            if distribution_combined_bindings is None:
+                                continue
+                            for new_url, new_url_bindings in new_urls:
+                                url_combined_bindings = distribution_combined_bindings.with_bindings(new_url_bindings)
+                                if url_combined_bindings is not None:
+                                    result.add(
+                                        (
+                                            facts.InstalledPackage(new_name, new_version, new_distribution, new_url),
+                                            url_combined_bindings,
+                                        )
+                                    )
+                return result
+            case facts.Symbolic(_):
+                return {(value, READBINDINGS_EMPTY)}
+        raise CallGraphError("unknown facts.Value type: " + value.__class__.__name__)
+
+    def transform_location(self, location: facts.Location) -> set[tuple[facts.Location, ReadBindings]]:
+        """Transform a location expression, returning the set of resolved values with the necessary bindings."""
+        new_location_specs = self.transform_location_specifier(location.loc)
+        if is_singleton_no_bindings(new_location_specs, location.loc):
+            return {(location, READBINDINGS_EMPTY)}
+        return {
+            (facts.Location(location.scope, new_location_spec), new_location_spec_bindings)
+            for new_location_spec, new_location_spec_bindings in new_location_specs
+        }
+
+    def transform_location_specifier(
+        self, location: facts.LocationSpecifier
+    ) -> set[tuple[facts.LocationSpecifier, ReadBindings]]:
+        """Transform a location specifier expression, returning the set of resolved values with the necessary bindings."""
+        match location:
+            case facts.Filesystem(path):
+                new_paths = self.transform_value(path)
+                if is_singleton_no_bindings(new_paths, path):
+                    return {(location, READBINDINGS_EMPTY)}
+                return {(facts.Filesystem(new_path), new_path_bindings) for new_path, new_path_bindings in new_paths}
+            case facts.Variable(name):
+                new_names = self.transform_value(name)
+                if is_singleton_no_bindings(new_names, name):
+                    return {(location, READBINDINGS_EMPTY)}
+                return {(facts.Variable(new_name), new_name_bindings) for new_name, new_name_bindings in new_names}
+            case facts.Artifact(name, file):
+                new_names = self.transform_value(name)
+                new_files = self.transform_value(file)
+                if is_singleton_no_bindings(new_names, name) and is_singleton_no_bindings(new_files, file):
+                    return {(location, READBINDINGS_EMPTY)}
+                artifact_result: set[tuple[facts.LocationSpecifier, ReadBindings]] = set()
+                for new_name, new_name_bindings in new_names:
+                    for new_file, new_file_bindings in new_files:
+                        combined_bindings = new_name_bindings.with_bindings(new_file_bindings)
+                        if combined_bindings is not None:
+                            artifact_result.add((facts.Artifact(new_name, new_file), combined_bindings))
+                return artifact_result
+            case facts.FilesystemAnyUnderDir(path):
+                new_paths = self.transform_value(path)
+                if is_singleton_no_bindings(new_paths, path):
+                    return {(location, READBINDINGS_EMPTY)}
+                return {
+                    (facts.FilesystemAnyUnderDir(new_path), new_path_bindings)
+                    for new_path, new_path_bindings in new_paths
+                }
+            case facts.ArtifactAnyFilename(name):
+                new_names = self.transform_value(name)
+                if is_singleton_no_bindings(new_names, name):
+                    return {(location, READBINDINGS_EMPTY)}
+                return {
+                    (facts.FilesystemAnyUnderDir(new_name), new_name_bindings)
+                    for new_name, new_name_bindings in new_names
+                }
+            case facts.ParameterPlaceholderLocation(name):
+                return {(location, READBINDINGS_EMPTY)}
+            case facts.Console():
+                return {(location, READBINDINGS_EMPTY)}
+            case facts.Installed(name):
+                new_names = self.transform_value(name)
+                return {(facts.Installed(new_name), new_name_bindings) for new_name, new_name_bindings in new_names}
+        raise CallGraphError("unknown location type: " + location.__class__.__name__)
+
+
+# TODO generalise visitors
+class ContainsSymbolicVisitor:
+    """Visitor to determine whether a given expression contains any symbolic expressions."""
+
+    def visit_value(self, value: facts.Value) -> bool:
+        """Search value expression for symbolic expressions and return whether any were found."""
+        match value:
+            case facts.StringLiteral(_):
+                return False
+            case facts.Read(loc):
+                return self.visit_location(loc)
+            case facts.ArbitraryNewData(_):
+                return False
+            case facts.UnaryStringOp(_, operand):
+                return self.visit_value(operand)
+            case facts.BinaryStringOp(_, operand1, operand2):
+                return self.visit_value(operand1) or self.visit_value(operand2)
+            case facts.ParameterPlaceholderValue(name):
+                return False
+            case facts.InstalledPackage(name, version, distribution, url):
+                return (
+                    self.visit_value(name)
+                    or self.visit_value(version)
+                    or self.visit_value(distribution)
+                    or self.visit_value(url)
+                )
+            case facts.SingleBashTokenConstraint(operand):
+                return self.visit_value(operand)
+            case facts.Symbolic(_):
+                return True
+        raise CallGraphError("unknown facts.Value type: " + value.__class__.__name__)
+
+    def visit_location(self, location: facts.Location) -> bool:
+        """Search location expression for symbolic expressions and return whether any were found."""
+        return self.visit_location_specifier(location.loc)
+
+    def visit_location_specifier(self, location: facts.LocationSpecifier) -> bool:
+        """Search location specifier expression for symbolic expressions and return whether any were found."""
+        match location:
+            case facts.Filesystem(path):
+                return self.visit_value(path)
+            case facts.Variable(name):
+                return self.visit_value(name)
+            case facts.Artifact(name, file):
+                return self.visit_value(name) or self.visit_value(file)
+            case facts.FilesystemAnyUnderDir(path):
+                return self.visit_value(path)
+            case facts.ArtifactAnyFilename(name):
+                return self.visit_value(name)
+            case facts.ParameterPlaceholderLocation(name):
+                return False
+            case facts.Console():
+                return False
+            case facts.Installed(name):
+                return self.visit_value(name)
+        raise CallGraphError("unknown location type: " + location.__class__.__name__)
+
+
+def filter_symbolic_values(values: set[tuple[facts.Value, ReadBindings]]) -> set[tuple[facts.Value, ReadBindings]]:
+    """Filter out symbolic values.
+
+    Returns a set containing all elements from the given set that do not contain any symbolic expressions.
+    """
+    return {val for val in values if not ContainsSymbolicVisitor().visit_value(val[0])}
+
+
+def filter_symbolic_locations(
+    locs: set[tuple[facts.Location, ReadBindings]],
+) -> set[tuple[facts.Location, ReadBindings]]:
+    """Filter out symbolic locations.
+
+    Returns a set containing all elements from the given set that do not contain any symbolic expressions.
+    """
+    return {loc for loc in locs if not ContainsSymbolicVisitor().visit_location(loc[0])}
+
+
+def filter_symbolic_location_specifiers(
+    locs: set[tuple[facts.LocationSpecifier, ReadBindings]],
+) -> set[tuple[facts.LocationSpecifier, ReadBindings]]:
+    """Filter out symbolic location specifiers.
+
+    Returns a set containing all elements from the given set that do not contain any symbolic expressions.
+    """
+    return {loc for loc in locs if not ContainsSymbolicVisitor().visit_location_specifier(loc[0])}
+
+
+def get_single_resolved_str(resolved_values: set[tuple[facts.Value, ReadBindings]]) -> str | None:
+    """If the given set contains only a single string literal value, return that string, or else None."""
+    resolved_values = filter_symbolic_values(resolved_values)
+    if len(resolved_values) == 1:
+        val = next(iter(resolved_values))[0]
+        if isinstance(val, facts.StringLiteral):
+            return val.literal
+    return None
+
+
+def get_single_resolved_str_with_default(
+    resolved_values: set[tuple[facts.Value, ReadBindings]], default_value: str
+) -> str:
+    """If the given set contains only a single string literal value, return that string, else return default value."""
+    result = get_single_resolved_str(resolved_values)
+    if result is not None:
+        return result
+    return default_value
+
+
+def parse_str_expr_split(str_expr: facts.Value, delimiter_char: str, maxsplit: int = -1) -> list[facts.Value]:
+    """Split a string expression on the appearance of the delimiter char in literal parts of the expression."""
+    if len(delimiter_char) != 1:
+        raise CallGraphError("delimiter_char must be single char")
+
+    match str_expr:
+        case facts.StringLiteral(literal):
+            split_str = literal.split(delimiter_char, maxsplit=maxsplit)
+            return [facts.StringLiteral(s) for s in split_str]
+        case facts.BinaryStringOp(facts.BinaryStringOperator.STRING_CONCAT, o1, o2):
+            split_lhs = parse_str_expr_split(o1, delimiter_char, maxsplit)
+            split_rhs = parse_str_expr_split(
+                o2, delimiter_char, -1 if maxsplit == -1 else maxsplit - (len(split_lhs) - 1)
+            )
+            if len(split_lhs) == 1 and len(split_rhs) == 1:
+                return [str_expr]
+            return (
+                split_lhs[:-1] + [facts.BinaryStringOp.get_string_concat(split_lhs[-1], split_rhs[0])] + split_rhs[1:]
+            )
+    return [str_expr]
diff --git a/src/macaron/code_analyzer/dataflow_analysis/facts.py b/src/macaron/code_analyzer/dataflow_analysis/facts.py
new file mode 100644
index 000000000..28d0f869d
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/facts.py
@@ -0,0 +1,702 @@
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Definitions of dataflow analysis representation for value expressions and abstract storage locations.
+
+Also includes an incomplete implementation of serialization/deserialization to a Souffle-datalog-compatible representation,
+which originated as a remnant of a previous prototype version that involved the datalog engine in the analysis, but
+is retained here because the serialization is useful for producing a human-readable string representation for debugging purposes,
+and it may be necessary in future to make these expressions available to the policy engine (which uses datalog).
+Deserialization is currently non-functional primarily due to the inability to deserialize scope identity, but may
+potentially be revisited in future, so is left here for posterity.
+"""
+
+from __future__ import annotations
+
+import abc
+from dataclasses import dataclass
+from enum import Enum, auto
+
+from macaron.errors import CallGraphError, ParseError
+
+
+class Value(abc.ABC):
+    """Base class for value expressions.
+
+    Subclasses should be comparable by structural equality.
+    """
+
+    @abc.abstractmethod
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+
+    def __str__(self) -> str:
+        return self.to_datalog_fact_string()
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+class LocationSpecifier(abc.ABC):
+    """Base class for location expressions.
+
+    Subclasses should be comparable by structural equality.
+    """
+
+    @abc.abstractmethod
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+
+    def __str__(self) -> str:
+        return self.to_datalog_fact_string()
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+# Sequence number to automatically give scopes unique names.
+# note: not thread safe
+SCOPE_SEQUENCE_NUMBER = 0
+
+
+class Scope:
+    """Representation of a scope in which a location may exist.
+
+    This allows for distinct locations with the same name/path/expression to exist separately in different namespaces.
+
+    A scope may have an outer scope, such that a read from a scope may return values from
+    the outer scope(s).
+
+    Unlike other expression classes, scopes are distinguished by object identity and not
+    structural equality (TODO now that scopes have names, maybe should revisit this since
+    it makes serialization/deserialization difficult).
+    """
+
+    #: Name for display purposes.
+    identifier: str
+    #: Outer scope, if any.
+    outer_scope: Scope | None
+
+    def __init__(self, name: str, outer_scope: Scope | None = None) -> None:
+        """Initialize scope.
+
+        Parameters
+        ----------
+        name: str
+            Name for display purposes (a sequence number will automatically be appended to make it unique).
+        outer_scope: Scope | None
+            Outer scope, if any.
+        """
+        self.outer_scope = outer_scope
+        global SCOPE_SEQUENCE_NUMBER  # pylint: disable=global-statement
+        self.identifier = str(SCOPE_SEQUENCE_NUMBER) + "_" + name
+        SCOPE_SEQUENCE_NUMBER += 1
+
+    def __hash__(self) -> int:
+        return id(self)
+
+    def __eq__(self, other: object) -> bool:
+        return self is other
+
+    def to_datalog_fact_string(self, include_outer_scope: bool = False) -> str:
+        """Return string representation of scope (in datalog serialized format)."""
+        return (
+            "$Scope("
+            + enquote_datalog_string_literal(self.identifier)
+            + (
+                ", " + self.outer_scope.to_datalog_fact_string()
+                if include_outer_scope and self.outer_scope is not None
+                else ""
+            )
+            + ")"
+        )
+
+    def __str__(self) -> str:
+        return self.to_datalog_fact_string()
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+class ParameterPlaceholderScope(Scope):
+    """Special scope placeholder to allow generic parameterized expressions.
+
+    TODO This is not really a proper subclass of Scope, should revisit type relationship.
+    """
+
+    #: Parameter name.
+    name: str
+
+    def __init__(self, name: str) -> None:  # pylint: disable=super-init-not-called
+        """Initialize placeholder scope with given parameter name."""
+        self.identifier = "param_" + name
+        self.name = name
+
+    def __hash__(self) -> int:
+        return hash(self.name)
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, ParameterPlaceholderScope) and other.name == self.name
+
+    def to_datalog_fact_string(self, include_outer_scope: bool = False) -> str:
+        """Return string representation of scope (in datalog serialized format)."""
+        return "$ParameterPlaceholderScope(" + enquote_datalog_string_literal(self.name) + ")"
+
+    def __str__(self) -> str:
+        return self.to_datalog_fact_string()
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+@dataclass(frozen=True, repr=False)
+class Location:
+    """A location expression qualified with the scope it resides in."""
+
+    #: Scope the location resides in.
+    scope: Scope
+    #: Location expression.
+    loc: LocationSpecifier
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "[" + self.scope.to_datalog_fact_string() + ", " + self.loc.to_datalog_fact_string() + "]"
+
+    def __str__(self) -> str:
+        return self.to_datalog_fact_string()
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
+
+@dataclass(frozen=True, repr=False)
+class StringLiteral(Value):
+    """Value expression representing a string literal."""
+
+    #: String literal.
+    literal: str
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$StringLiteral(" + enquote_datalog_string_literal(self.literal) + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Read(Value):
+    """Value expression representing a read of the value stored at a location."""
+
+    #: Read value location.
+    loc: Location
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Read(" + self.loc.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class ArbitraryNewData(Value):
+    """Value expression representing some arbitrary data."""
+
+    #: Name distiguishing the origin of the data.
+    at: str
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$ArbitraryNewData(" + enquote_datalog_string_literal(self.at) + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class InstalledPackage(Value):
+    """Value expression representing an installed package, with identifying metadata (name, version, etc.)."""
+
+    #: Package name.
+    name: Value
+    #: Package version.
+    version: Value
+    #: Package distribution.
+    distribution: Value
+    #: URL of the package.
+    url: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return (
+            "$InstalledPackage("
+            + self.name.to_datalog_fact_string()
+            + ", "
+            + self.version.to_datalog_fact_string()
+            + ", "
+            + self.distribution.to_datalog_fact_string()
+            + ", "
+            + self.url.to_datalog_fact_string()
+            + ")"
+        )
+
+
+class UnaryStringOperator(Enum):
+    """Unary operators."""
+
+    BASENAME = auto()
+    BASE64_ENCODE = auto()
+    BASE64DECODE = auto()
+
+
+def un_op_to_datalog_fact_string(op: UnaryStringOperator) -> str:
+    """Return string representation of operator (in datalog serialized format)."""
+    if op == UnaryStringOperator.BASENAME:
+        return "$BaseName"
+    if op == UnaryStringOperator.BASE64_ENCODE:
+        return "$Base64Encode"
+    if op == UnaryStringOperator.BASE64DECODE:
+        return "$Base64Decode"
+    raise CallGraphError("unknown UnaryStringOperator")
+
+
+class BinaryStringOperator(Enum):
+    """Binary operators."""
+
+    STRING_CONCAT = auto()
+
+
+def bin_op_to_datalog_fact_string(op: BinaryStringOperator) -> str:
+    """Return string representation of operator (in datalog serialized format)."""
+    if op == BinaryStringOperator.STRING_CONCAT:
+        return "$StringConcat"
+    raise CallGraphError("unknown BinaryStringOperator")
+
+
+@dataclass(frozen=True, repr=False)
+class UnaryStringOp(Value):
+    """Value expression representing a unary operator."""
+
+    #: Operator.
+    op: UnaryStringOperator
+    #: Operand value.
+    operand: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return (
+            "$UnaryStringOp("
+            + un_op_to_datalog_fact_string(self.op)
+            + ", "
+            + self.operand.to_datalog_fact_string()
+            + ")"
+        )
+
+
+@dataclass(frozen=True, repr=False)
+class BinaryStringOp(Value):
+    """Value expression representing a binary operator."""
+
+    #: Operator.
+    op: BinaryStringOperator
+    #: First operand value.
+    operand1: Value
+    #: Second operand value.
+    operand2: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return (
+            "$BinaryStringOp("
+            + bin_op_to_datalog_fact_string(self.op)
+            + ", "
+            + self.operand1.to_datalog_fact_string()
+            + ", "
+            + self.operand2.to_datalog_fact_string()
+            + ")"
+        )
+
+    @staticmethod
+    def get_string_concat(operand1: Value, operand2: Value) -> Value:
+        """Construct a string concatenation operator.
+
+        Applies some simple constant-folding simplifications.
+        """
+        match operand1, operand2:
+            # "a" + "b" = "ab"
+            case StringLiteral(op1_lit), StringLiteral(op2_lit):
+                return StringLiteral(op1_lit + op2_lit)
+            # "" + x = x
+            case StringLiteral(""), _:
+                return operand2
+            # x + "" = x
+            case _, StringLiteral(""):
+                return operand1
+            # (x + "a") + "b" = x + "ab"
+            case BinaryStringOp(BinaryStringOperator.STRING_CONCAT, subop1, StringLiteral(subop2_lit)), StringLiteral(
+                op2_lit
+            ):
+                return BinaryStringOp(BinaryStringOperator.STRING_CONCAT, subop1, StringLiteral(subop2_lit + op2_lit))
+            # "a" + ("b" + x) = "ab" + x
+            case StringLiteral(op1_lit), BinaryStringOp(
+                BinaryStringOperator.STRING_CONCAT, StringLiteral(subop1_lit), subop2
+            ):
+                return BinaryStringOp(BinaryStringOperator.STRING_CONCAT, StringLiteral(op1_lit + subop1_lit), subop2)
+
+        return BinaryStringOp(BinaryStringOperator.STRING_CONCAT, operand1, operand2)
+
+
+@dataclass(frozen=True, repr=False)
+class ParameterPlaceholderValue(Value):
+    """Special placeholder value to allow generic parameterized expressions."""
+
+    #: Parameter name.
+    name: str
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$ParameterPlaceholderValue(" + enquote_datalog_string_literal(self.name) + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Symbolic(Value):
+    """Value expression representing a symbolic expression.
+
+    Represents an expression that has been "frozen" in symbolic form rather than evaluated concretely.
+    """
+
+    #: Symbolic expression.
+    val: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Symbolic(" + self.val.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class SingleBashTokenConstraint(Value):
+    """Value expression representing a constraint that the underlying value does not parse as multiple Bash tokens."""
+
+    #: Constrained expression.
+    val: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$SingleBashTokenConstraint(" + self.val.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Filesystem(LocationSpecifier):
+    """Location expression representing a filesystem location at a particular file path."""
+
+    #: Filepath value.
+    path: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Filesystem(" + self.path.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Variable(LocationSpecifier):
+    """Location expression representing a variable."""
+
+    #: Variable name.
+    name: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Variable(" + self.name.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Artifact(LocationSpecifier):
+    """Location expression representing a file stored within some named artifact storage location."""
+
+    #: Artifact name.
+    name: Value
+    #: File name within artifact.
+    file: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Artifact(" + self.name.to_datalog_fact_string() + ", " + self.file.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class FilesystemAnyUnderDir(LocationSpecifier):
+    """Location expression representing any file under a particular directory."""
+
+    #: Directory file path.
+    path: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$FilesystemAnyUnderDir(" + self.path.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class ArtifactAnyFilename(LocationSpecifier):
+    """Location expression representing any file contained with a named artifact storage location."""
+
+    #: Artifact name.
+    name: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$ArtifactAnyFilename(" + self.name.to_datalog_fact_string() + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class ParameterPlaceholderLocation(LocationSpecifier):
+    """Special placeholder location expression to allow generic parameterized expressions."""
+
+    #: Parameter name.
+    name: str
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$ParameterPlaceholderLocation(" + enquote_datalog_string_literal(self.name) + ")"
+
+
+@dataclass(frozen=True, repr=False)
+class Console(LocationSpecifier):
+    """Location expression representing a console, pipe or other text stream."""
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Console"
+
+
+@dataclass(frozen=True, repr=False)
+class Installed(LocationSpecifier):
+    """Location expression representing an installed package."""
+
+    #: Package name.
+    name: Value
+
+    def to_datalog_fact_string(self) -> str:
+        """Return string representation of expression (in datalog serialized format)."""
+        return "$Installed(" + self.name.to_datalog_fact_string() + ")"
+
+
+def enquote_datalog_string_literal(literal: str) -> str:
+    """Enquote a datalog string literal, with appropriate escaping."""
+    return '"' + literal.replace("\\", "\\\\").replace('"', '\\"') + '"'
+
+
+class FactParseError(Exception):
+    """Happens when an error occurs during fact parsing."""
+
+
+def consume_whitespace(text: str) -> str:
+    """Consume leading whitespace, returning the remainder to the text."""
+    text_end_idx = len(text)
+    space_end_idx = text_end_idx
+    idx = 0
+    while idx < text_end_idx:
+        if text[idx].isspace():
+            idx = idx + 1
+        else:
+            space_end_idx = idx
+            break
+    return text[space_end_idx:text_end_idx]
+
+
+def consume(text: str, token: str) -> str:
+    """Consume the leading token from the text.
+
+    Raises exception if text does not start with the token.
+    """
+    if text.startswith(token):
+        return text[len(token) :]
+    raise FactParseError(text)
+
+
+def parse_qualified_name(text: str) -> tuple[str, str]:
+    """Parse a qualified name, returning the name and the remainder of the text."""
+    text = consume_whitespace(text)
+    text_end_idx = len(text)
+    name_end_idx = text_end_idx
+    idx = 0
+    while idx < text_end_idx:
+        if text[idx].isalnum() or text[idx] == "_" or text[idx] == "?" or text[idx] == ".":
+            idx = idx + 1
+        else:
+            name_end_idx = idx
+            break
+    return text[0:name_end_idx], text[name_end_idx:text_end_idx]
+
+
+def parse_symbol(text: str) -> tuple[str, str]:
+    """Parse datalog-serialized string literal."""
+    text = consume(text, '"')
+    text_end_idx = len(text)
+    str_end_idx = text_end_idx
+    idx = 0
+    in_escape = False
+    char_list = []
+    while idx < text_end_idx:
+        if text[idx] == "\\":
+            if not in_escape:
+                in_escape = True
+            else:
+                char_list.append("\\")
+                in_escape = False
+        elif text[idx] == '"':
+            if not in_escape:
+                str_end_idx = idx
+                break
+            char_list.append('"')
+            in_escape = False
+        else:
+            char_list.append(text[idx])
+        idx = idx + 1
+
+    lit = "".join(char_list)
+    text = text[str_end_idx:]
+    text = consume(text, '"')
+    return lit, text
+
+
+def parse_location_specifier(text: str) -> tuple[LocationSpecifier, str]:
+    """Deserialize location specifier from string representation (in datalog serialized format)."""
+    text = consume(text, "$")
+    kind, text = parse_qualified_name(text)
+    match kind:
+        case "Filesystem":
+            text = consume(text, "(")
+            path_val, text = parse_value(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return Filesystem(path_val), text
+        case "Variable":
+            text = consume(text, "(")
+            name_val, text = parse_value(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return Variable(name_val), text
+        case "Artifact":
+            text = consume(text, "(")
+            name_val, text = parse_value(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            file_val, text = parse_value(text)
+            text = consume(text, ")")
+            return Artifact(name_val, file_val), text
+        case "FilesystemAnyUnderDir":
+            text = consume(text, "(")
+            path_val, text = parse_value(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return FilesystemAnyUnderDir(path_val), text
+        case "ArtifactAnyFilename":
+            text = consume(text, "(")
+            name_val, text = parse_value(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return ArtifactAnyFilename(name_val), text
+        case "Console":
+            return Console(), text
+        case "Installed":
+            text = consume(text, "(")
+            name_val, text = parse_value(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return Installed(name_val), text
+
+    raise FactParseError()
+
+
+def parse_location(text: str) -> tuple[Location, str]:
+    """Deserialize location from string representation (in datalog serialized format).
+
+    Currently non-functional primarily due to the inability to deserialize scope identity.
+    """
+    raise ParseError("cannot parse, need fix")
+
+
+def parse_value(text: str) -> tuple[Value, str]:
+    """Deserialize value expression from string representation (in datalog serialized format)."""
+    text = consume(text, "$")
+    kind, text = parse_qualified_name(text)
+    match kind:
+        case "StringLiteral":
+            text = consume(text, "(")
+            lit, text = parse_symbol(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return StringLiteral(lit), text
+        case "Read":
+            text = consume(text, "(")
+            loc, text = parse_location(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return Read(loc), text
+        case "ArbitraryNewData":
+            text = consume(text, "(")
+            at, text = parse_symbol(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return ArbitraryNewData(at), text
+        case "UnaryStringOp":
+            text = consume(text, "(")
+            un_operator, text = parse_un_op(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            operand_val, text = parse_value(text)
+            text = consume(text, ")")
+            return UnaryStringOp(un_operator, operand_val), text
+        case "BinaryStringOp":
+            text = consume(text, "(")
+            bin_operator, text = parse_bin_op(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            operand1, text = parse_value(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            operand2, text = parse_value(text)
+            text = consume(text, ")")
+            return BinaryStringOp(bin_operator, operand1, operand2), text
+        case "ParameterPlaceholderValue":
+            text = consume(text, "(")
+            name, text = parse_symbol(text)
+            text = consume_whitespace(text)
+            text = consume(text, ")")
+            return ParameterPlaceholderValue(name), text
+        case "SingleBashTokenConstraint":
+            text = consume(text, "(")
+            operand, text = parse_value(text)
+            text = consume(text, ")")
+            return SingleBashTokenConstraint(operand), text
+        case "InstalledPackage":
+            text = consume(text, "(")
+            name_val, text = parse_value(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            version_val, text = parse_value(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            distribution_val, text = parse_value(text)
+            text = consume(text, ",")
+            text = consume_whitespace(text)
+            url_val, text = parse_value(text)
+            text = consume(text, ")")
+            return InstalledPackage(name_val, version_val, distribution_val, url_val), text
+    raise FactParseError()
+
+
+def parse_un_op(text: str) -> tuple[UnaryStringOperator, str]:
+    """Deserialize unary operator from string representation (in datalog serialized format)."""
+    text = consume(text, "$")
+    name, text = parse_qualified_name(text)
+    match name:
+        case "BaseName":
+            return UnaryStringOperator.BASENAME, text
+        case "Base64Encode":
+            return UnaryStringOperator.BASE64_ENCODE, text
+        case "Base64Decode":
+            return UnaryStringOperator.BASE64DECODE, text
+    raise FactParseError()
+
+
+def parse_bin_op(text: str) -> tuple[BinaryStringOperator, str]:
+    """Deserialize binary operator from string representation (in datalog serialized format)."""
+    text = consume(text, "$")
+    name, text = parse_qualified_name(text)
+    match name:
+        case "StringConcat":
+            return BinaryStringOperator.STRING_CONCAT, text
+    raise FactParseError()
diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py
new file mode 100644
index 000000000..6da30e745
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/github.py
@@ -0,0 +1,1314 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Dataflow analysis implementation for analysing GitHub Actions Workflow build pipelines."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from collections.abc import Callable, Iterator
+from dataclasses import dataclass
+from graphlib import TopologicalSorter
+
+from macaron.code_analyzer.dataflow_analysis import bash, core, evaluation, facts, github_expr, models, printing
+from macaron.errors import CallGraphError
+from macaron.parsers import github_workflow_model
+
+
+@dataclass(frozen=True)
+class GitHubActionsWorkflowContext(core.Context):
+    """Context for the top-level scope of a GitHub Actions Workflow."""
+
+    #: Outer analysis context.
+    analysis_context: core.ContextRef[core.AnalysisContext]
+    #: Scope for artifact storage within the pipeline execution (for upload/download artifact).
+    artifacts: core.ContextRef[facts.Scope]
+    #: Scope for artifacts published as GitHub releases by the pipeline.
+    releases: core.ContextRef[facts.Scope]
+    #: Scope for environment variables (env block at top-level of workflow).
+    env: core.ContextRef[facts.Scope]
+    #: Scope for variables within the workflow.
+    workflow_variables: core.ContextRef[facts.Scope]
+    #: Scope for console output.
+    console: core.ContextRef[facts.Scope]
+    #: Filepath of workflow file.
+    source_filepath: str
+
+    @staticmethod
+    def create(
+        analysis_context: core.ContextRef[core.AnalysisContext], source_filepath: str
+    ) -> GitHubActionsWorkflowContext:
+        """Create a new workflow context and its associated scopes.
+
+        Parameters
+        ----------
+        analysis_context: core.ContextRef[core.AnalysisContext]
+            Outer analysis context.
+        source_filepath: str
+            Filepath of workflow file.
+
+        Returns
+        -------
+        GitHubActionsWorkflowContext
+            The new workflow context.
+        """
+        return GitHubActionsWorkflowContext(
+            analysis_context=analysis_context.get_non_owned(),
+            artifacts=core.OwningContextRef(facts.Scope("artifacts")),
+            releases=core.OwningContextRef(facts.Scope("releases")),
+            env=core.OwningContextRef(facts.Scope("env")),
+            workflow_variables=core.OwningContextRef(facts.Scope("workflow_vars")),
+            console=core.OwningContextRef(facts.Scope("console")),
+            source_filepath=source_filepath,
+        )
+
+    def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+        yield self.analysis_context
+        yield self.artifacts
+        yield self.releases
+        yield self.env
+        yield self.workflow_variables
+        yield self.console
+
+
+@dataclass(frozen=True)
+class GitHubActionsJobContext(core.Context):
+    """Context for a job within a GitHub Actions Workflow."""
+
+    #: Outer workflow context.
+    workflow_context: core.ContextRef[GitHubActionsWorkflowContext]
+    #: Scope for filesystem used by the job and its steps.
+    filesystem: core.ContextRef[facts.Scope]
+    #: Scope for environment variables (env block at job level).
+    env: core.ContextRef[facts.Scope]
+    #: Scope for variables within the job (step output variables, etc.).
+    job_variables: core.ContextRef[facts.Scope]
+
+    @staticmethod
+    def create(workflow_context: core.ContextRef[GitHubActionsWorkflowContext]) -> GitHubActionsJobContext:
+        """Create a new job context and its associated scopes.
+
+        Env and job variables scopes inherit from outer context.
+
+        Parameters
+        ----------
+        workflow_context: core.ContextRef[GitHubActionsWorkflowContext]
+            Outer workflow context.
+
+        Returns
+        -------
+        GitHubActionsJobContext
+            The new job context.
+        """
+        return GitHubActionsJobContext(
+            workflow_context=workflow_context.get_non_owned(),
+            filesystem=core.OwningContextRef(facts.Scope("filesystem")),
+            env=core.OwningContextRef(facts.Scope("env", workflow_context.ref.env.ref)),
+            job_variables=core.OwningContextRef(facts.Scope("job_vars", workflow_context.ref.workflow_variables.ref)),
+        )
+
+    def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+        yield self.workflow_context
+        yield self.filesystem
+        yield self.env
+        yield self.job_variables
+
+
+@dataclass(frozen=True)
+class GitHubActionsStepContext(core.Context):
+    """Context for a step within a job within a GitHub Actions Workflow."""
+
+    #: Outer job context.
+    job_context: core.ContextRef[GitHubActionsJobContext]
+    #: Scope for environment variables (env block at step level)
+    env: core.ContextRef[facts.Scope]
+    #: Name prefix for step output variables (stored in the job variables)
+    #: belonging to this step (e.g. "steps.step_id.outputs.")
+    output_var_prefix: str | None
+
+    @staticmethod
+    def create(job_context: core.ContextRef[GitHubActionsJobContext], step_id: str | None) -> GitHubActionsStepContext:
+        """Create a new step context and its associated scopes.
+
+        Env scope inherits from outer context. Output var prefix is derived from step_id.
+
+        Parameters
+        ----------
+        job_context: core.ContextRef[GitHubActionsJobContext]
+            Outer job context.
+        step_id: str | None
+            Step id. If provided, used to derive name previx for step output variables.
+
+        Returns
+        -------
+        GitHubActionsStepContext
+            The new step context.
+        """
+        return GitHubActionsStepContext(
+            job_context=job_context.get_non_owned(),
+            env=core.OwningContextRef(facts.Scope("env", job_context.ref.env.ref)),
+            output_var_prefix=("steps." + step_id + ".outputs.") if step_id is not None else None,
+        )
+
+    def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]:
+        """Yield the direct references of the context, either to scopes or to other contexts."""
+        yield self.job_context
+        yield self.env
+
+
+class RawGitHubActionsWorkflowNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Workflow.
+
+    Defines how to interpret a parsed workflow and generate its analysis representation.
+    """
+
+    #: Parsed workflow AST.
+    definition: github_workflow_model.Workflow
+
+    #: Workflow context
+    context: core.ContextRef[GitHubActionsWorkflowContext]
+
+    def __init__(
+        self, definition: github_workflow_model.Workflow, context: core.ContextRef[GitHubActionsWorkflowContext]
+    ) -> None:
+        """Initialize node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret the workflow AST to generate control flow representation."""
+
+        def build_workflow_node() -> core.Node:
+            return GitHubActionsWorkflowNode.create(self.definition, self.context.get_non_owned())
+
+        return {"default": build_workflow_node}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the workflow name and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "name" in self.definition:
+            result["workflow name"] = {(None, self.definition["name"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+    @staticmethod
+    def create(
+        workflow: github_workflow_model.Workflow,
+        analysis_context: core.ContextRef[core.AnalysisContext],
+        source_filepath: str,
+    ) -> RawGitHubActionsWorkflowNode:
+        """Create workflow node and its associated context.
+
+        Parameters
+        ----------
+        workflow: github_workflow_model.Workflow
+            Parsed workflow AST.
+        analysis_context: core.ContextRef[core.AnalysisContext]
+            Outer analysis context.
+        source_filepath: str
+            Filepath of workflow file.
+
+        Returns
+        -------
+        RawGitHubActionsWorkflowNode
+            The new workflow node.
+        """
+        workflow_context = GitHubActionsWorkflowContext.create(analysis_context, source_filepath)
+
+        return RawGitHubActionsWorkflowNode(workflow, core.OwningContextRef(workflow_context))
+
+
+class GitHubActionsWorkflowNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a GitHub Actions Workflow.
+
+    Control flow structure executes each job in an arbitrary linear sequence
+    (by default a topological sort satsifying the job dependencies). If an env block exists,
+    it is applied beforehand.
+    """
+
+    #: Parsed workflow AST.
+    definition: github_workflow_model.Workflow
+    #: Workflow context.
+    context: core.ContextRef[GitHubActionsWorkflowContext]
+    #: Node to apply effects of env block, if any.
+    env_block: RawGitHubActionsEnvNode | None
+    #: Job nodes, identified by their job id.
+    jobs: dict[str, RawGitHubActionsJobNode]
+    #: List of job ids specifying job execution order.
+    order: list[str]
+    #: Control flow graph.
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: github_workflow_model.Workflow,
+        context: core.ContextRef[GitHubActionsWorkflowContext],
+        env_block: RawGitHubActionsEnvNode | None,
+        jobs: dict[str, RawGitHubActionsJobNode],
+        order: list[str],
+    ) -> None:
+        """Initialize workflow node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.Workflow
+            Parsed workflow AST.
+        context: core.ContextRef[GitHubActionsWorkflowContext]
+            Workflow context.
+        env_block: RawGitHubActionsEnvNode | None
+            Node to apply effects of env block, if any.
+        jobs: dict[str, RawGitHubActionsJobNode]
+            List of job ids specifying job execution order.
+        order: list[str]
+            List of job ids specifying job execution order.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+        self.env_block = env_block
+        self.jobs = jobs
+        self.order = order
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(
+            list(filter(core.node_is_not_none, [self.env_block] + [self.jobs[job_id] for job_id in self.order]))
+        )
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the child nodes of this node."""
+        if self.env_block is not None:
+            yield self.env_block
+        for job_id in self.order:
+            yield self.jobs[job_id]
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successors for a particular exit of a particular node."""
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the workflow name and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "name" in self.definition:
+            result["workflow name"] = {(None, self.definition["name"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+    @staticmethod
+    def create(
+        workflow: github_workflow_model.Workflow, context: core.NonOwningContextRef[GitHubActionsWorkflowContext]
+    ) -> GitHubActionsWorkflowNode:
+        """Create workflow node from workflow AST.
+
+        Also creates a job node for each job, and performs a topological sort of the job dependency graph
+        to choose an arbitrary valid sequential execution order.
+
+        Parameters
+        ----------
+        workflow: github_workflow_model.Workflow
+            Parsed workflow AST.
+        context: core.NonOwningContextRef[GitHubActionsWorkflowContext]
+            Workflow context.
+
+        Returns
+        -------
+        GitHubActionsWorkflowNode
+            The new workflow node.
+        """
+        jobs: dict[str, RawGitHubActionsJobNode] = {}
+
+        for job_id, job in workflow["jobs"].items():
+            job_node = RawGitHubActionsJobNode(
+                job, job_id, core.OwningContextRef(GitHubActionsJobContext.create(context))
+            )
+            jobs[job_id] = job_node
+
+        dependency_graph: dict[str, list[str]] = {}
+        for job_id, job_node in jobs.items():
+            edges: list[str] = []
+            if "needs" in job_node.definition:
+                needs = job_node.definition["needs"]
+                if isinstance(needs, list):
+                    for need in needs:
+                        # TODO invalid needs id?
+                        edges.append(need)
+                elif isinstance(needs, str):
+                    edges.append(needs)
+            dependency_graph[job_id] = edges
+
+        ts = TopologicalSorter(dependency_graph)
+        order = list(ts.static_order())
+
+        env_block = None
+        if "env" in workflow:
+            env_block = RawGitHubActionsEnvNode(workflow["env"], context)
+
+        return GitHubActionsWorkflowNode(workflow, context, env_block, jobs, order)
+
+
+class RawGitHubActionsJobNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Job.
+
+    Defines how to interpret the different kinds of jobs (normal jobs, reusable workflow call jobs),
+    and generate their analysis representation.
+    """
+
+    #: Parsed job AST.
+    definition: github_workflow_model.Job
+    #: Job id.
+    job_id: str
+    #: Job context.
+    context: core.ContextRef[GitHubActionsJobContext]
+
+    def __init__(
+        self, definition: github_workflow_model.Job, job_id: str, context: core.ContextRef[GitHubActionsJobContext]
+    ) -> None:
+        """Initialize node."""
+        super().__init__()
+        self.definition = definition
+        self.job_id = job_id
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret job AST to generate representation for either a normal job or a reusable workflow call job."""
+        if github_workflow_model.is_normal_job(self.definition):
+            normal_job_definition = self.definition
+
+            def build_normal_job() -> core.Node:
+                return GitHubActionsNormalJobNode.create(
+                    normal_job_definition, self.job_id, self.context.get_non_owned()
+                )
+
+            return {"default": build_normal_job}
+        if github_workflow_model.is_reusable_workflow_call_job(self.definition):
+            raw_with_params = self.definition.get("with", {})
+            call_def = self.definition
+            if isinstance(raw_with_params, dict):
+
+                def build_reusable_workflow_call_job() -> core.Node:
+                    uses_name, _, uses_version = call_def["uses"].rpartition("@")
+
+                    with_parameters: dict[str, facts.Value] = {}
+                    for key, val in raw_with_params.items():
+                        if isinstance(val, str):
+                            parsed_val = github_expr.extract_value_from_expr_string(
+                                val, self.context.ref.job_variables.ref
+                            )
+                            if parsed_val is not None:
+                                with_parameters[key] = parsed_val
+                        elif isinstance(val, bool):
+                            with_parameters[key] = facts.StringLiteral("true") if val else facts.StringLiteral("false")
+                        else:
+                            with_parameters[key] = facts.StringLiteral(str(val))
+
+                    return GitHubActionsReusableWorkflowCallNode(
+                        call_def,
+                        self.job_id,
+                        self.context.get_non_owned(),
+                        uses_name,
+                        uses_version if uses_version != "" else None,
+                        with_parameters,
+                    )
+
+                return {"default": build_reusable_workflow_call_job}
+
+            def build_noop() -> core.Node:
+                return core.NoOpStatementNode()
+
+            return {"default": build_noop}
+
+        raise CallGraphError("invalid job")
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the job id and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["job id"] = {(None, self.job_id)}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+
+class GitHubActionsNormalJobNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a GitHub Actions Normal Job.
+
+    Control flow structure executes each step in the order defined by the job,
+    preceded by applying the effects of the matrix and env blocks if they exist
+    and succeeded by applying the effects of the output block if it exists.
+    (TODO generating output block not yet implemented).
+    """
+
+    #: Parsed job AST.
+    definition: github_workflow_model.NormalJob
+    #: Job id.
+    job_id: str
+    #: Node to apply effects of matrix block, if any.
+    matrix_block: RawGitHubActionsMatrixNode | None
+    #: Node to apply effects of env block, if any.
+    env_block: RawGitHubActionsEnvNode | None
+    #: Step nodes, in execution order.
+    steps: list[RawGitHubActionsStepNode]
+    #: Node to apply effects of output block, if any.
+    output_block: core.Node | None  # TODO More specific
+    #: Job context
+    context: core.ContextRef[GitHubActionsJobContext]
+    #: Control flow graph
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: github_workflow_model.NormalJob,
+        job_id: str,
+        matrix_block: RawGitHubActionsMatrixNode | None,
+        env_block: RawGitHubActionsEnvNode | None,
+        steps: list[RawGitHubActionsStepNode],
+        output_block: core.Node | None,
+        context: core.ContextRef[GitHubActionsJobContext],
+    ) -> None:
+        """Initialize job node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.NormalJob
+            Parsed job AST.
+        job_id: str
+            Job id.
+        matrix_block: RawGitHubActionsMatrixNode | None
+            Node to apply effects of matrix block, if any.
+        env_block: RawGitHubActionsEnvNode | None
+            Node to apply effects of env block, if any.
+        steps: list[RawGitHubActionsStepNode]
+            Step nodes, in execution order.
+        output_block: core.Node | None,
+            Node to apply effects of output block, if any.
+        context: core.ContextRef[GitHubActionsJobContext]
+            Job context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.job_id = job_id
+        self.matrix_block = matrix_block
+        self.env_block = env_block
+        self.steps = steps
+        self.output_block = output_block
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(
+            list(filter(core.node_is_not_none, [self.matrix_block, self.env_block] + self.steps + [self.output_block]))
+        )
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the child nodes of this node."""
+        if self.matrix_block is not None:
+            yield self.matrix_block
+        if self.env_block is not None:
+            yield self.env_block
+        yield from self.steps
+        if self.output_block is not None:
+            yield self.output_block
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successors for a particular exit of a particular node."""
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the job id and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["job id"] = {(None, self.job_id)}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        job: github_workflow_model.NormalJob, job_id: str, context: core.NonOwningContextRef[GitHubActionsJobContext]
+    ) -> GitHubActionsNormalJobNode:
+        """Create normal job node from job AST. Also creates a step node for each step.
+
+        Parameters
+        ----------
+        job: github_workflow_model.NormalJob
+            Parsed job AST.
+        job_id: str
+            Job id.
+        context: core.NonOwningContextRef[GitHubActionsJobContext]
+            Job context.
+
+        Returns
+        -------
+        GitHubActionsNormalJobNode
+            The new job node.
+        """
+        # TODO output block
+
+        matrix_block = None
+        if "strategy" in job and "matrix" in job["strategy"]:
+            matrix_block = RawGitHubActionsMatrixNode(job["strategy"]["matrix"], context)
+
+        env_block = None
+        if "env" in job:
+            env_block = RawGitHubActionsEnvNode(job["env"], context)
+
+        steps = [
+            RawGitHubActionsStepNode(
+                step, core.OwningContextRef(GitHubActionsStepContext.create(context, step.get("id")))
+            )
+            for step in job.get("steps", [])
+        ]
+
+        return GitHubActionsNormalJobNode(job, job_id, matrix_block, env_block, steps, None, context)
+
+
+class GitHubActionsReusableWorkflowCallNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Reusable Workflow Call Job.
+
+    Defines how to interpret the semantics of different supported reusable workflows that may
+    be invoked (TODO currently none are supported).
+    """
+
+    #: Parsed reusable workflow call AST.
+    definition: github_workflow_model.ReusableWorkflowCallJob
+    #: Job id.
+    job_id: str
+    #: Job context.
+    context: core.ContextRef[GitHubActionsJobContext]
+
+    #: Name of the reusable workflow being invoked (without version component).
+    uses_name: str
+    #: Version of the reusable workflow being invoked (if specified).
+    uses_version: str | None
+
+    #: Input parameters specified for reusable workflow.
+    with_parameters: dict[str, facts.Value]
+
+    def __init__(
+        self,
+        definition: github_workflow_model.ReusableWorkflowCallJob,
+        job_id: str,
+        context: core.ContextRef[GitHubActionsJobContext],
+        uses_name: str,
+        uses_version: str | None,
+        with_parameters: dict[str, facts.Value],
+    ) -> None:
+        """Initialize reusable workflow call node.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.ReusableWorkflowCallJob
+            Parsed reusable workflow call AST.
+        job_id: str
+            Job id.
+        context: core.ContextRef[GitHubActionsJobContext]
+            Job context.
+        uses_name: str
+            Name of the reusable workflow being invoked (without version component).
+        uses_version: str | None
+            Version of the reusable workflow being invoked (if specified).
+        with_parameters: dict[str, facts.Value]
+            Input parameters specified for reusable workflow.
+        """
+        super().__init__()
+        self.definition = definition
+        self.job_id = job_id
+        self.context = context
+        self.uses_name = uses_name
+        self.uses_version = uses_version
+        self.with_parameters = with_parameters
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Intepret the semantics of the different supported reusable workflows.
+
+        (TODO currently none are supported).
+        """
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table.
+
+        Contains the job id, reusable workflow name, and scopes.
+        """
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        result["job id"] = {(None, self.job_id)}
+        result["uses"] = {(None, self.definition["uses"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+
+class RawGitHubActionsStepNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Step.
+
+    Defines how to interpret the different kinds of steps (run jobs, action steps),
+    and generate their analysis representation.
+    """
+
+    #: Parsed step AST.
+    definition: github_workflow_model.Step
+    #: Step context
+    context: core.ContextRef[GitHubActionsStepContext]
+
+    def __init__(
+        self, definition: github_workflow_model.Step, context: core.ContextRef[GitHubActionsStepContext]
+    ) -> None:
+        """Intitialize node."""
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret step AST to generate representation depending on whether it is a run step or an action step."""
+        if github_workflow_model.is_action_step(self.definition):
+            action_step_definition = self.definition
+
+            def build_action_step() -> core.Node:
+                return RawGitHubActionsActionStepNode(action_step_definition, self.context.get_non_owned())
+
+            return {"default": build_action_step}
+        if github_workflow_model.is_run_step(self.definition):
+            run_step_definition = self.definition
+
+            def build_run_step() -> core.Node:
+                return GitHubActionsRunStepNode.create(run_step_definition, self.context.get_non_owned())
+
+            return {"default": build_run_step}
+        raise CallGraphError("invalid step")
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table.
+
+        Contains the step id, name, action name (if action step), and scopes.
+        """
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "id" in self.definition:
+            result["step id"] = {(None, self.definition["id"])}
+        elif "name" in self.definition:
+            result["step name"] = {(None, self.definition["name"])}
+        if github_workflow_model.is_action_step(self.definition):
+            result["step uses"] = {(None, self.definition["uses"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+
+class RawGitHubActionsActionStepNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Action Step.
+
+    Defines how to extract the name, version and parameters used to invoke the action,
+    and generate a node with those details resolved for further interpretation.
+    """
+
+    #: Parsed step AST.
+    definition: github_workflow_model.ActionStep
+    #: Step context.
+    context: core.ContextRef[GitHubActionsStepContext]
+
+    def __init__(
+        self, definition: github_workflow_model.ActionStep, context: core.ContextRef[GitHubActionsStepContext]
+    ) -> None:
+        """Initialize node."""
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Intepret action step AST to extract the name, version and parameters."""
+        raw_with_params = self.definition.get("with", {})
+        if isinstance(raw_with_params, dict):
+
+            def build_action() -> core.Node:
+                uses_name, _, uses_version = self.definition["uses"].rpartition("@")
+
+                with_parameters: dict[str, facts.Value] = {}
+                for key, val in raw_with_params.items():
+                    if isinstance(val, str):
+                        parsed_val = github_expr.extract_value_from_expr_string(
+                            val, self.context.ref.job_context.ref.job_variables.ref
+                        )
+                        if parsed_val is not None:
+                            with_parameters[key] = parsed_val
+                    elif isinstance(val, bool):
+                        with_parameters[key] = facts.StringLiteral("true") if val else facts.StringLiteral("false")
+                    else:
+                        with_parameters[key] = facts.StringLiteral(str(val))
+
+                return GitHubActionsActionStepNode(
+                    self.definition,
+                    self.context.get_non_owned(),
+                    uses_name,
+                    uses_version if uses_version != "" else None,
+                    with_parameters,
+                )
+
+            return {"default": build_action}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the step id, name, action name, and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "id" in self.definition:
+            result["step id"] = {(None, self.definition["id"])}
+        elif "name" in self.definition:
+            result["step name"] = {(None, self.definition["name"])}
+        result["step uses"] = {(None, self.definition["uses"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+
+class GitHubActionsActionStepNode(core.InterpretationNode):
+    """Interpretation node representing a GitHub Actions Action Step.
+
+    Defines how to interpret the semantics of different supported actions that may
+    be invoked.
+    """
+
+    #: Parsed step AST.
+    definition: github_workflow_model.ActionStep
+    #: Step context.
+    context: core.ContextRef[GitHubActionsStepContext]
+
+    #: Name of the action being invoked (without version component).
+    uses_name: str
+    #: Version of the action being invoked (if specified).
+    uses_version: str | None
+
+    #: Input parameters specified for action.
+    with_parameters: dict[str, facts.Value]
+
+    def __init__(
+        self,
+        definition: github_workflow_model.ActionStep,
+        context: core.ContextRef[GitHubActionsStepContext],
+        uses_name: str,
+        uses_version: str | None,
+        with_parameters: dict[str, facts.Value],
+    ) -> None:
+        """Initialize action step node.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.ActionStep
+            Parsed step AST.
+        context: core.ContextRef[GitHubActionsStepContext]
+            Step context.
+        uses_name: str
+            Name of the action being invoked (without version component).
+        uses_version: str | None
+            Version of the action being invoked (if specified).
+        with_parameters: dict[str, facts.Value]
+            Input parameters specified for action.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+        self.uses_name = uses_name
+        self.uses_version = uses_version
+        self.with_parameters = with_parameters
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Intepret the semantics of the different supported actions."""
+        match self.uses_name:
+            case "actions/checkout":
+
+                def build_checkout() -> core.Node:
+                    return models.GitHubActionsGitCheckoutModelNode()
+
+                return {"default": build_checkout}
+            case "actions/setup-java":
+                # Installs Java toolchain
+                def build_setup_java() -> core.Node:
+                    return models.InstallPackageNode(
+                        install_scope=self.context.ref.job_context.ref.filesystem.ref,
+                        name=facts.StringLiteral("java"),
+                        version=self.with_parameters.get("java-version", facts.StringLiteral("")),
+                        distribution=self.with_parameters.get("distribution", facts.StringLiteral("")),
+                        url=facts.StringLiteral("https://github.com/actions/setup-java"),
+                    )
+
+                return {"default": build_setup_java}
+            case "graalvm/setup-graalvm":
+                # Installs Java toolchain
+                def build_setup_graalvm() -> core.Node:
+                    return models.InstallPackageNode(
+                        install_scope=self.context.ref.job_context.ref.filesystem.ref,
+                        name=facts.StringLiteral("java"),
+                        version=self.with_parameters.get("java-version", facts.StringLiteral("")),
+                        distribution=self.with_parameters.get("distribution", facts.StringLiteral("graalvm")),
+                        url=facts.StringLiteral("https://github.com/graalvm/setup-graalvm"),
+                    )
+
+                return {"default": build_setup_graalvm}
+
+            case "oracle-actions/setup-java":
+                # Installs Java toolchain
+                def build_setup_oracle_java() -> core.Node:
+                    return models.InstallPackageNode(
+                        install_scope=self.context.ref.job_context.ref.filesystem.ref,
+                        name=facts.StringLiteral("java"),
+                        version=self.with_parameters.get("release", facts.StringLiteral("")),
+                        distribution=self.with_parameters.get("website", facts.StringLiteral("oracle.com")),
+                        url=facts.StringLiteral("https://github.com/oracle-actions/setup-java"),
+                    )
+
+                return {"default": build_setup_oracle_java}
+            case "actions/setup-python":
+                # Installs Python toolchain
+                def build_setup_python() -> core.Node:
+                    return models.InstallPackageNode(
+                        install_scope=self.context.ref.job_context.ref.filesystem.ref,
+                        name=facts.StringLiteral("python"),
+                        version=self.with_parameters.get("python-version", facts.StringLiteral("")),
+                        distribution=facts.StringLiteral(""),
+                        url=facts.StringLiteral(""),
+                    )
+
+                return {"default": build_setup_python}
+            case "actions/upload-artifact":
+                # Uploads artifact to pipeline artifact storage.
+                if "name" in self.with_parameters and "path" in self.with_parameters:
+                    split = evaluation.parse_str_expr_split(self.with_parameters["path"], "\n")
+                    if len(split) == 1:
+
+                        def build_upload_artifact() -> core.Node:
+                            return models.GitHubActionsUploadArtifactModelNode(
+                                artifacts_scope=self.context.ref.job_context.ref.workflow_context.ref.artifacts.ref,
+                                artifact_name=self.with_parameters["name"],
+                                artifact_file=facts.UnaryStringOp(facts.UnaryStringOperator.BASENAME, split[0]),
+                                filesystem_scope=self.context.ref.job_context.ref.filesystem.ref,
+                                path=split[0],
+                            )
+
+                        return {"default": build_upload_artifact}
+
+                    def build_multiple_upload_artifact() -> core.Node:
+                        seq: list[core.Node] = [
+                            models.GitHubActionsUploadArtifactModelNode(
+                                artifacts_scope=self.context.ref.job_context.ref.workflow_context.ref.artifacts.ref,
+                                artifact_name=self.with_parameters["name"],
+                                artifact_file=facts.UnaryStringOp(facts.UnaryStringOperator.BASENAME, path),
+                                filesystem_scope=self.context.ref.job_context.ref.filesystem.ref,
+                                path=path,
+                            )
+                            for path in [x for x in split if x != facts.StringLiteral("")]
+                        ]
+                        if len(seq) == 0:
+                            return core.NoOpStatementNode()
+                        return core.SimpleSequence(seq)
+
+                    return {"default": build_multiple_upload_artifact}
+
+            case "actions/download-artifact":
+                # Downloads artifact from pipeline artifact storage.
+                if "name" in self.with_parameters:
+
+                    def build_download_artifact() -> core.Node:
+                        return models.GitHubActionsDownloadArtifactModelNode(
+                            artifacts_scope=self.context.ref.job_context.ref.workflow_context.ref.artifacts.ref,
+                            artifact_name=self.with_parameters["name"],
+                            filesystem_scope=self.context.ref.job_context.ref.filesystem.ref,
+                        )
+
+                    return {"default": build_download_artifact}
+            case "softprops/action-gh-release":
+                # Creates a GitHub release.
+                if "files" in self.with_parameters:
+                    split = evaluation.parse_str_expr_split(self.with_parameters["files"], "\n")
+                    if len(split) == 1:
+
+                        def build_upload_release() -> core.Node:
+                            return models.GitHubActionsReleaseModelNode(
+                                artifacts_scope=self.context.ref.job_context.ref.workflow_context.ref.releases.ref,
+                                artifact_name=facts.StringLiteral(str(id(self))),
+                                artifact_file=facts.UnaryStringOp(facts.UnaryStringOperator.BASENAME, split[0]),
+                                filesystem_scope=self.context.ref.job_context.ref.filesystem.ref,
+                                path=split[0],
+                            )
+
+                        return {"default": build_upload_release}
+
+                    def build_multiple_upload_release() -> core.Node:
+                        return core.SimpleSequence(
+                            [
+                                models.GitHubActionsReleaseModelNode(
+                                    artifacts_scope=self.context.ref.job_context.ref.workflow_context.ref.releases.ref,
+                                    artifact_name=facts.StringLiteral(str(id(self))),
+                                    artifact_file=facts.UnaryStringOp(facts.UnaryStringOperator.BASENAME, path),
+                                    filesystem_scope=self.context.ref.job_context.ref.filesystem.ref,
+                                    path=path,
+                                )
+                                for path in [x for x in split if x != facts.StringLiteral("")]
+                            ]
+                        )
+
+                    return {"default": build_multiple_upload_release}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the step id, name, action name, with parameters, and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "id" in self.definition:
+            result["step id"] = {(None, self.definition["id"])}
+        elif "name" in self.definition:
+            result["step_name"] = {(None, self.definition["name"])}
+        result["step uses"] = {(None, self.definition["uses"])}
+
+        for key, val in self.with_parameters.items():
+            result["with(" + key + ")"] = {(None, val.to_datalog_fact_string())}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+
+        return result
+
+
+class GitHubActionsRunStepNode(core.ControlFlowGraphNode):
+    """Control-flow-graph node representing a GitHub Actions Run Step.
+
+    Control flow structure executes the shell script defined by the step.
+    If an env block exists, it is applied beforehand.
+    """
+
+    #: Parsed step AST.
+    definition: github_workflow_model.RunStep
+    #: Node to apply effects of env block, if any.
+    env_block: RawGitHubActionsEnvNode | None
+    #: Shell script to be run.
+    shell_block: bash.RawBashScriptNode
+    #: Step context.
+    context: core.ContextRef[GitHubActionsStepContext]
+    #: Control flow graph
+    _cfg: core.ControlFlowGraph
+
+    def __init__(
+        self,
+        definition: github_workflow_model.RunStep,
+        env_block: RawGitHubActionsEnvNode | None,
+        shell_block: bash.RawBashScriptNode,
+        context: core.ContextRef[GitHubActionsStepContext],
+    ) -> None:
+        """Initialize run step node.
+
+        Typically, construction should be done via the create function rather than using this constructor directly.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.RunStep
+            Parsed step AST.
+        env_block: RawGitHubActionsEnvNode | None
+            Node to apply effects of env block, if any.
+        shell_block: bash.RawBashScriptNode
+            Shell script to be run.
+        context: core.ContextRef[GitHubActionsStepContext]
+            Step context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.env_block = env_block
+        self.shell_block = shell_block
+        self.context = context
+
+        self._cfg = core.ControlFlowGraph.create_from_sequence(
+            list(filter(core.node_is_not_none, [self.env_block, self.shell_block]))
+        )
+
+    def children(self) -> Iterator[core.Node]:
+        """Yield the child nodes of this node."""
+        if self.env_block is not None:
+            yield self.env_block
+        yield self.shell_block
+
+    def get_entry(self) -> core.Node:
+        """Return the entry node."""
+        return self._cfg.get_entry()
+
+    def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]:
+        """Return the successors for a particular exit of a particular node."""
+        return self._cfg.get_successors(node, core.DEFAULT_EXIT)
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the step id, name, and scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+        if "id" in self.definition:
+            result["step id"] = {(None, self.definition["id"])}
+        elif "name" in self.definition:
+            result["step name"] = {(None, self.definition["name"])}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+    @staticmethod
+    def create(
+        run_step: github_workflow_model.RunStep, context: core.NonOwningContextRef[GitHubActionsStepContext]
+    ) -> GitHubActionsRunStepNode:
+        """Create run step node from step AST.
+
+        Parameters
+        ----------
+        run_step: github_workflow_model.RunStep
+            Parsed step AST.
+        context: core.NonOwningContextRef[GitHubActionsStepContext]
+            Step context.
+
+        Returns
+        -------
+        GitHubActionsRunStepNode
+            The new run step node.
+        """
+        env_block = None
+        if "env" in run_step:
+            env_block = RawGitHubActionsEnvNode(run_step["env"], context)
+        script_node = bash.RawBashScriptNode(
+            facts.StringLiteral(run_step["run"]),
+            core.OwningContextRef(bash.BashScriptContext.create_from_run_step(context, "")),
+        )
+        return GitHubActionsRunStepNode(run_step, env_block, script_node, context)
+
+
+class RawGitHubActionsEnvNode(core.InterpretationNode):
+    """Interpretation node representing an env block in a GitHub Actions Workflow/Job/Step.
+
+    Defines how to interpret the declarative env block to generate imperative constructs to
+    write the values to the env variables.
+    """
+
+    #: Parsed env block AST.
+    definition: github_workflow_model.Env
+    #: Outer context.
+    context: core.ContextRef[GitHubActionsWorkflowContext | GitHubActionsJobContext | GitHubActionsStepContext]
+
+    def __init__(
+        self,
+        definition: github_workflow_model.Env,
+        context: core.ContextRef[GitHubActionsWorkflowContext | GitHubActionsJobContext | GitHubActionsStepContext],
+    ) -> None:
+        """Initialize env block node.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.Env
+            Parsed env block AST.
+        context: core.ContextRef[GitHubActionsWorkflowContext | GitHubActionsJobContext | GitHubActionsStepContext]
+            Outer context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret declarative env block to generate imperative constructs to write to the env vars."""
+        env = self.definition
+        if isinstance(env, dict):
+
+            def build_env_writes() -> core.Node:
+                env_writes: dict[str, facts.Value] = {}
+                for key, val in env.items():
+                    if isinstance(val, str):
+                        var_scope = (
+                            self.context.ref.job_context.ref.job_variables.ref
+                            if isinstance(self.context.ref, GitHubActionsStepContext)
+                            else (
+                                self.context.ref.job_variables.ref
+                                if isinstance(self.context.ref, GitHubActionsJobContext)
+                                else None
+                            )
+                        )
+                        parsed_val = github_expr.extract_value_from_expr_string(val, var_scope)
+                        if parsed_val is not None:
+                            env_writes[key] = parsed_val
+                    elif isinstance(val, bool):
+                        env_writes[key] = facts.StringLiteral("true") if val else facts.StringLiteral("false")
+                    else:
+                        env_writes[key] = facts.StringLiteral(str(val))
+
+                if len(env_writes) == 0:
+                    return core.NoOpStatementNode()
+
+                return core.SimpleSequence(
+                    [
+                        models.VarAssignNode(
+                            models.VarAssignKind.GITHUB_ENV_VAR, self.context.ref.env.ref, facts.StringLiteral(var), val
+                        )
+                        for var, val in env_writes.items()
+                    ]
+                )
+
+            return {"default": build_env_writes}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
+
+
+class RawGitHubActionsMatrixNode(core.InterpretationNode):
+    """Interpretation node representing a matrix block in a GitHub Actions Job.
+
+    Defines how to interpret the declarative matrix block to generate imperative constructs to
+    write the values to the matrix variables.
+    """
+
+    #: Parsed matrix block AST.
+    definition: github_workflow_model.Matrix
+    #: Outer job context.
+    context: core.ContextRef[GitHubActionsJobContext]
+
+    def __init__(
+        self,
+        definition: github_workflow_model.Matrix,
+        context: core.ContextRef[GitHubActionsJobContext],
+    ) -> None:
+        """Initialize matrix node.
+
+        Parameters
+        ----------
+        definition: github_workflow_model.Matrix
+            Parsed matrix block AST.
+        context: core.ContextRef[GitHubActionsJobContext]
+            Outer job context.
+        """
+        super().__init__()
+        self.definition = definition
+        self.context = context
+
+    def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]:
+        """Interpret declarative matrix block to generate imperative constructs to write to the matrix variables."""
+        matrix = self.definition
+        if isinstance(matrix, dict):
+
+            def build_matrix_writes() -> core.Node:
+                matrix_writes: dict[str, list[facts.Value]] = defaultdict(list)
+                if isinstance(matrix, dict):
+                    for key, vals in matrix.items():
+                        if isinstance(vals, list):
+                            var_scope = self.context.ref.job_variables.ref
+
+                            for val in vals:
+                                if isinstance(val, str):
+                                    parsed_val = github_expr.extract_value_from_expr_string(val, var_scope)
+                                    if parsed_val is not None:
+                                        matrix_writes[key].append(parsed_val)
+                                elif isinstance(val, bool):
+                                    matrix_writes[key].append(
+                                        facts.StringLiteral("true") if val else facts.StringLiteral("false")
+                                    )
+                                else:
+                                    matrix_writes[key].append(facts.StringLiteral(str(val)))
+
+                if len(matrix_writes) == 0:
+                    return core.NoOpStatementNode()
+
+                return core.SimpleSequence(
+                    [
+                        core.SimpleAlternatives(
+                            [
+                                models.VarAssignNode(
+                                    models.VarAssignKind.GITHUB_JOB_VAR,
+                                    self.context.ref.job_variables.ref,
+                                    facts.StringLiteral("matrix." + key),
+                                    val,
+                                )
+                                for val in vals
+                            ]
+                        )
+                        for key, vals in matrix_writes.items()
+                    ]
+                )
+
+            return {"default": build_matrix_writes}
+
+        def build_noop() -> core.Node:
+            return core.NoOpStatementNode()
+
+        return {"default": build_noop}
+
+    def get_exit_state_transfer_filter(self) -> core.StateTransferFilter:
+        """Return state transfer filter to clear scopes owned by this node after this node exits."""
+        return core.ExcludedScopesStateTransferFilter(core.get_owned_scopes(self.context))
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties table containing the scopes."""
+        result: dict[str, set[tuple[str | None, str]]] = {}
+
+        printing.add_context_owned_scopes_to_properties_table(result, self.context)
+        return result
diff --git a/src/macaron/code_analyzer/dataflow_analysis/github_expr.py b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py
new file mode 100644
index 000000000..8961750a4
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/github_expr.py
@@ -0,0 +1,141 @@
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Parser for GitHub Actions expression language."""
+
+from typing import cast
+
+from lark import Lark, Token, Tree
+
+from macaron.code_analyzer.dataflow_analysis import facts
+
+# Parser for GitHub Actions expression language grammar.
+github_expr_parser = Lark(
+    r"""
+    _expr: literal
+        | identifier
+        | _operator_expr
+        | function_call
+
+    literal: BOOLEAN_LITERAL
+           | NULL_LITERAL
+           | NUMBER_LITERAL
+           | STRING_LITERAL
+
+    BOOLEAN_LITERAL: "true" | "false"
+
+    NULL_LITERAL: "null"
+
+    NUMBER_LITERAL: SIGNED_NUMBER
+
+    STRING_LITERAL: "'" STRING_INNER + "'"
+
+    STRING_INNER: /.*?/s
+
+    CNAMEWITHDASH: ("_"|LETTER) ("_"|"-"|LETTER|DIGIT)*
+
+    identifier: CNAMEWITHDASH
+
+    _operator_expr: paren_expr
+                 | property_deref
+                 | property_deref_object_filter
+                 | index_expr
+                 | not_expr
+                 | and_expr
+                 | or_expr
+                 | less_than_expr
+                 | less_than_equal_expr
+                 | greater_than_expr
+                 | greater_than_equal_expr
+                 | equal_expr
+                 | not_equal_expr
+
+    paren_expr: "(" _expr ")"
+    property_deref: _expr "." identifier
+    property_deref_object_filter: _expr "." "*"
+    index_expr: _expr "[" _expr "]"
+    not_expr: "!" _expr
+    and_expr: _expr "&&" _expr
+    or_expr: _expr "||" _expr
+    less_than_expr: _expr "<" _expr
+    less_than_equal_expr: _expr "<=" _expr
+    greater_than_expr: _expr ">" _expr
+    greater_than_equal_expr: _expr ">=" _expr
+    equal_expr: _expr "==" _expr
+    not_equal_expr: _expr "!=" _expr
+
+    function_call: identifier "(" _expr ("," _expr)* ")"
+
+    %import common.SIGNED_NUMBER
+    %import common.WS
+    %import common.LETTER
+    %import common.DIGIT
+    %import common._STRING_INNER
+    %ignore WS
+    """,
+    start="_expr",
+)
+
+
+def extract_expr_variable_name(node: Token | Tree[Token]) -> str | None:
+    """Return variable access path for token.
+
+    If the given node is a variable access or sequence of property accesses, return the
+    access path as a string, otherwise return None.
+    """
+    if isinstance(node, Tree) and node.data == "property_deref":
+        rest = extract_expr_variable_name(node.children[0])
+        property_identifier = cast(Tree, node.children[1])
+        if rest is not None:
+            identifier = cast(Token, property_identifier.children[0])
+            return rest + "." + identifier
+    elif isinstance(node, Tree) and node.data == "identifier":
+        identifier = cast(Token, node.children[0])
+        return cast(str, identifier.value)
+
+    return None
+
+
+def extract_value_from_expr_string(s: str, var_scope: facts.Scope | None) -> facts.Value | None:
+    """Return a value expression representation of a string containing GitHub Actions expressions.
+
+    GitHub Action expressions within the string are denoted by "${{ <expr> }}".
+
+    Returns None if it is unrepresentable.
+    """
+    cur_idx = 0
+    cur_expr_begin = s.find("${{")
+    values: list[facts.Value] = []
+    while cur_expr_begin != -1:
+        cur_str = s[cur_idx:cur_expr_begin]
+        values.append(facts.StringLiteral(cur_str))
+        cur_expr_end = s.find("}}", cur_expr_begin)
+        cur_expr = s[cur_expr_begin + 3 : cur_expr_end]
+        parse_tree = github_expr_parser.parse(cur_expr)
+
+        node = parse_tree.children[0]
+
+        var_str = extract_expr_variable_name(node)
+        if var_str is not None and var_scope is not None:
+            values.append(
+                facts.Read(
+                    loc=facts.Location(scope=var_scope, loc=facts.Variable(name=facts.StringLiteral(literal=var_str)))
+                )
+            )
+        else:
+            return None
+
+        cur_idx = cur_expr_end + 2
+        cur_expr_begin = s.find("${{", cur_idx)
+    last_str = s[cur_idx:]
+
+    values.append(facts.StringLiteral(last_str))
+
+    if len(values) == 1:
+        return values[0]
+
+    cur_concat = facts.BinaryStringOp.get_string_concat(values[0], values[1])
+
+    for val in values[2:]:
+        cur_concat = facts.BinaryStringOp.get_string_concat(cur_concat, val)
+    return cur_concat
diff --git a/src/macaron/code_analyzer/dataflow_analysis/models.py b/src/macaron/code_analyzer/dataflow_analysis/models.py
new file mode 100644
index 000000000..4528c1bc1
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/models.py
@@ -0,0 +1,679 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Models of supported commands, actions, etc. that may be invoked by build pipelines.
+
+Defines how they are modelled by the dataflow analysis in terms of their effect on the abstract state.
+"""
+
+from __future__ import annotations
+
+from enum import Enum, auto
+from functools import cache
+
+from macaron.code_analyzer.dataflow_analysis import core, evaluation, facts
+
+
+class BoundParameterisedStatementSet:
+    """Representation of a set of (simultaneous) write operations.
+
+    Defined as a reference to a set of generic parameterised statements, along with a set of parameter bindings
+    that instantiate the parameterised statements with concrete subexpressions.
+    """
+
+    #: Set of generic parameterised statements.
+    parameterised_stmts: evaluation.StatementSet
+    #: Parameter bindings for values.
+    value_parameter_binds: dict[str, facts.Value]
+    #: Parameter bindings for locations.
+    location_parameter_binds: dict[str, facts.LocationSpecifier]
+    #: Parameter bindings for scopes.
+    scope_parameter_binds: dict[str, facts.Scope]
+    #: Instantiated statements.
+    instantiated_statements: evaluation.StatementSet
+
+    def __init__(
+        self,
+        parameterised_stmts: evaluation.StatementSet,
+        value_parameter_binds: dict[str, facts.Value] | None = None,
+        location_parameter_binds: dict[str, facts.LocationSpecifier] | None = None,
+        scope_parameter_binds: dict[str, facts.Scope] | None = None,
+    ) -> None:
+        """Initialize bound parameterised statement set.
+
+        Parameters
+        ----------
+        parameterised_stmts: evaluation.StatementSet
+            Set of generic parameterised statements.
+        value_parameter_binds: dict[str, facts.Value] | None
+            Parameter bindings for value.
+        location_parameter_binds: dict[str, facts.LocationSpecifier] | None
+            Parameter bindings for locations.
+        scope_parameter_binds: dict[str, facts.Scope] | None
+            Parameter bindings for scopes.
+        """
+        self.parameterised_stmts = parameterised_stmts
+        self.value_parameter_binds = value_parameter_binds or {}
+        self.location_parameter_binds = location_parameter_binds or {}
+        self.scope_parameter_binds = scope_parameter_binds or {}
+
+        transformer = evaluation.ParameterPlaceholderTransformer(
+            allow_unbound_params=False,
+            value_parameter_binds=self.value_parameter_binds,
+            location_parameter_binds=self.location_parameter_binds,
+            scope_parameter_binds=self.scope_parameter_binds,
+        )
+        self.instantiated_statements = transformer.transform_statement_set(parameterised_stmts)
+
+    def get_statements(self) -> evaluation.StatementSet:
+        """Return instantiated statement set."""
+        return self.instantiated_statements
+
+
+class BoundParameterisedModelNode(core.StatementNode):
+    """Statement node that applies effects as defined in a provided model.
+
+    Subclasses will define a statement node with a specific model.
+    """
+
+    #: Statement effects model.
+    stmts: BoundParameterisedStatementSet
+
+    def __init__(self, stmts: BoundParameterisedStatementSet) -> None:
+        """Initialise model statement node."""
+        super().__init__()
+
+        self.stmts = stmts
+
+    def apply_effects(self, before_state: core.State) -> dict[core.ExitType, core.State]:
+        """Apply effects as defined in a provided model."""
+        return {core.DEFAULT_EXIT: self.stmts.get_statements().apply_effects(before_state)}
+
+
+class InstallPackageNode(BoundParameterisedModelNode):
+    """Model for package installation.
+
+    Stores a representation of the installed package into the abstract "installed packages" location.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return the model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("install_scope"),
+                        facts.Installed(name=facts.ParameterPlaceholderValue("name")),
+                    ),
+                    facts.InstalledPackage(
+                        name=facts.ParameterPlaceholderValue("name"),
+                        version=facts.ParameterPlaceholderValue("version"),
+                        distribution=facts.ParameterPlaceholderValue("distribution"),
+                        url=facts.ParameterPlaceholderValue("url"),
+                    ),
+                )
+            }
+        )
+
+    #: Scope into which to install.
+    install_scope: facts.Scope
+    #: Package name.
+    name: facts.Value
+    #: Package version.
+    version: facts.Value
+    #: Package distribution.
+    distribution: facts.Value
+    #: URL of package.
+    url: facts.Value
+
+    def __init__(
+        self,
+        install_scope: facts.Scope,
+        name: facts.Value,
+        version: facts.Value,
+        distribution: facts.Value,
+        url: facts.Value,
+    ) -> None:
+        """Initialize install package node.
+
+        Parameters
+        ----------
+        install_scope: facts.Scope
+            Scope into which to install.
+        name: facts.Value
+            Package name.
+        version: facts.Value
+            Package version.
+        distribution: facts.Value
+            Package distribution.
+        url: facts.Value
+            URL of package.
+        """
+        self.install_scope = install_scope
+        self.name = name
+        self.version = version
+        self.distribution = distribution
+        self.url = url
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            value_parameter_binds={"name": name, "version": version, "distribution": distribution, "url": url},
+            scope_parameter_binds={"install_scope": install_scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "install_scope": {(None, self.install_scope.to_datalog_fact_string())},
+            "name": {(None, self.name.to_datalog_fact_string())},
+            "version": {(None, self.version.to_datalog_fact_string())},
+            "distribution": {(None, self.distribution.to_datalog_fact_string())},
+            "url": {(None, self.url.to_datalog_fact_string())},
+        }
+
+
+class VarAssignKind(Enum):
+    """Kind of variable assignment."""
+
+    #: Bash environment variable.
+    BASH_ENV_VAR = auto()
+    #: Bash function declaration.
+    BASH_FUNC_DECL = auto()
+    #: GitHub job variable.
+    GITHUB_JOB_VAR = auto()
+    #: GitHub environment variable.
+    GITHUB_ENV_VAR = auto()
+    #: Other uncategorized variable.
+    OTHER = auto()
+
+
+class VarAssignNode(BoundParameterisedModelNode):
+    """Model for variable assignment.
+
+    Stores the assigned value to the variable location.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return the model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("var_scope"),
+                        facts.Variable(facts.ParameterPlaceholderValue("var_name")),
+                    ),
+                    facts.ParameterPlaceholderValue("value"),
+                )
+            }
+        )
+
+    #: The kind of variable.
+    kind: VarAssignKind
+    #: The scope in which the variable is stored.
+    var_scope: facts.Scope
+    #: The name of the variable.
+    var_name: facts.Value
+    #: The value to assign to the variable.
+    value: facts.Value
+
+    def __init__(self, kind: VarAssignKind, var_scope: facts.Scope, var_name: facts.Value, value: facts.Value) -> None:
+        """Initialize variable assignment node.
+
+        Parameters
+        ----------
+        kind: VarAssignKind
+            The kind of variable.
+        var_scope: facts.Scope
+            The scope in which the variable is stored.
+        var_name: facts.Value
+            The name of the variable.
+        value: facts.Value
+            The value to assign to the variable.
+        """
+        self.kind = kind
+        self.var_scope = var_scope
+        self.var_name = var_name
+        self.value = value
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            value_parameter_binds={"var_name": var_name, "value": value},
+            scope_parameter_binds={"var_scope": var_scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "kind": {(None, self.kind.name)},
+            "var_scope": {(None, self.var_scope.to_datalog_fact_string())},
+            "var_name": {(None, self.var_name.to_datalog_fact_string())},
+            "value": {(None, self.value.to_datalog_fact_string())},
+        }
+
+
+class GitHubActionsGitCheckoutModelNode(core.StatementNode):
+    """Model for GitHub git checkout operation.
+
+    Currently modelled as a no-op.
+    """
+
+    def apply_effects(self, before_state: core.State) -> dict[core.ExitType, core.State]:
+        """Apply effects for git checkout (currently nothing)."""
+        state = core.State()
+        core.transfer_state(before_state, state)
+        return {core.DEFAULT_EXIT: state}
+
+
+class GitHubActionsUploadArtifactModelNode(BoundParameterisedModelNode):
+    """Model for uploading artifacts to GitHub pipeline artifact storage.
+
+    Stores the content read from a file to the artifact storage location.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return the model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("artifacts_scope"),
+                        facts.Artifact(
+                            name=facts.ParameterPlaceholderValue("artifact_name"),
+                            file=facts.ParameterPlaceholderValue("artifact_file"),
+                        ),
+                    ),
+                    facts.Read(
+                        facts.Location(
+                            facts.ParameterPlaceholderScope("filesystem_scope"),
+                            facts.Filesystem(facts.ParameterPlaceholderValue("path")),
+                        )
+                    ),
+                )
+            }
+        )
+
+    #: Scope for pipeline artifact storage.
+    artifacts_scope: facts.Scope
+    #: Artifact name.
+    artifact_name: facts.Value
+    #: Artifact filename.
+    artifact_file: facts.Value
+    #: Scope for filesystem from which to read file.
+    filesystem_scope: facts.Scope
+    #: File path to read artifact content from.
+    path: facts.Value
+
+    def __init__(
+        self,
+        artifacts_scope: facts.Scope,
+        artifact_name: facts.Value,
+        artifact_file: facts.Value,
+        filesystem_scope: facts.Scope,
+        path: facts.Value,
+    ) -> None:
+        """Initialize upload artifacts node.
+
+        Parameters
+        ----------
+        artifacts_scope: facts.Scope
+            Scope for pipeline artifact storage.
+        artifact_name: facts.Value
+            Artifact name.
+        artifact_file: facts.Value
+            Artifact filename.
+        filesystem_scope: facts.Scope
+            Scope for filesystem from which to read file.
+        path: facts.Value
+            File path to read artifact content from.
+        """
+        self.artifacts_scope = artifacts_scope
+        self.artifact_name = artifact_name
+        self.artifact_file = artifact_file
+        self.filesystem_scope = filesystem_scope
+        self.path = path
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            value_parameter_binds={"artifact_name": artifact_name, "artifact_file": artifact_file, "path": path},
+            scope_parameter_binds={"artifacts_scope": artifacts_scope, "filesystem_scope": filesystem_scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "artifacts_scope": {(None, self.artifacts_scope.to_datalog_fact_string())},
+            "artifact_name": {(None, self.artifact_name.to_datalog_fact_string())},
+            "artifact_file": {(None, self.artifact_file.to_datalog_fact_string())},
+            "filesystem_scope": {(None, self.filesystem_scope.to_datalog_fact_string())},
+            "path": {(None, self.path.to_datalog_fact_string())},
+        }
+
+
+class GitHubActionsDownloadArtifactModelNode(BoundParameterisedModelNode):
+    """Model for downloading artifacts from GitHub pipeline artifact storage.
+
+    For each file in the artifact, reads the content of that artifact and
+    stores it to the filesystem under the same filename.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("filesystem_scope"),
+                        facts.Filesystem(
+                            facts.Read(
+                                facts.Location(
+                                    facts.ParameterPlaceholderScope("artifacts_scope"),
+                                    facts.ArtifactAnyFilename(facts.ParameterPlaceholderValue("artifact_name")),
+                                )
+                            )
+                        ),
+                    ),
+                    facts.Read(
+                        facts.Location(
+                            facts.ParameterPlaceholderScope("artifacts_scope"),
+                            facts.Artifact(
+                                name=facts.ParameterPlaceholderValue("artifact_name"),
+                                file=facts.Read(
+                                    facts.Location(
+                                        facts.ParameterPlaceholderScope("artifacts_scope"),
+                                        facts.ArtifactAnyFilename(facts.ParameterPlaceholderValue("artifact_name")),
+                                    )
+                                ),
+                            ),
+                        )
+                    ),
+                )
+            }
+        )
+
+    #: Scope for pipeline artifact storage.
+    artifacts_scope: facts.Scope
+    #: Artifact name.
+    artifact_name: facts.Value
+    #: Scope for filesystem to store artifacts to.
+    filesystem_scope: facts.Scope
+
+    def __init__(self, artifacts_scope: facts.Scope, artifact_name: facts.Value, filesystem_scope: facts.Scope) -> None:
+        """Initialize download artifacts node.
+
+        Parameters
+        ----------
+        artifacts_scope: facts.Scope
+            Scope for pipeline artifact storage.
+        artifact_name: facts.Value
+            Artifact name.
+        filesystem_scope: facts.Scope
+            Scope for filesystem to store artifacts to.
+        """
+        self.artifacts_scope = artifacts_scope
+        self.artifact_name = artifact_name
+        self.filesystem_scope = filesystem_scope
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            value_parameter_binds={"artifact_name": artifact_name},
+            scope_parameter_binds={"artifacts_scope": artifacts_scope, "filesystem_scope": filesystem_scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "artifacts_scope": {(None, self.artifacts_scope.to_datalog_fact_string())},
+            "artifact_name": {(None, self.artifact_name.to_datalog_fact_string())},
+            "filesystem_scope": {(None, self.filesystem_scope.to_datalog_fact_string())},
+        }
+
+
+class GitHubActionsReleaseModelNode(GitHubActionsUploadArtifactModelNode):
+    """Model for uploading artifacts to a GitHub release.
+
+    Modelled in the same way as artifact upload.
+    """
+
+
+class BashEchoNode(BoundParameterisedModelNode):
+    """Model for Bash echo command, which writes the echoed value to some location."""
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("out_loc_scope"),
+                        facts.ParameterPlaceholderLocation("out_loc_spec"),
+                    ),
+                    facts.ParameterPlaceholderValue("value"),
+                )
+            }
+        )
+
+    #: Output location.
+    out_loc: facts.Location
+    #: Value written.
+    value: facts.Value
+
+    def __init__(self, out_loc: facts.Location, value: facts.Value) -> None:
+        """Initialize echo node.
+
+        Parameters
+        ----------
+        out_loc: facts.Location
+            Output location.
+        value: facts.Value
+            Value written.
+        """
+        self.out_loc = out_loc
+        self.value = value
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            value_parameter_binds={"value": value},
+            location_parameter_binds={"out_loc_spec": out_loc.loc},
+            scope_parameter_binds={"out_loc_scope": out_loc.scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "out_loc": {(None, self.out_loc.to_datalog_fact_string())},
+            "value": {(None, self.value.to_datalog_fact_string())},
+        }
+
+
+class Base64EncodeNode(BoundParameterisedModelNode):
+    """Model for Base64 encode operation.
+
+    Reads a value from some location, Base64-encodes it and writes the result to another location.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("out_loc_scope"),
+                        facts.ParameterPlaceholderLocation("out_loc_spec"),
+                    ),
+                    facts.UnaryStringOp(
+                        facts.UnaryStringOperator.BASE64_ENCODE,
+                        facts.Read(
+                            facts.Location(
+                                facts.ParameterPlaceholderScope("in_loc_scope"),
+                                facts.ParameterPlaceholderLocation("in_loc_spec"),
+                            )
+                        ),
+                    ),
+                )
+            }
+        )
+
+    #: Location to read input from.
+    in_loc: facts.Location
+    #: Location to write encoded output to.
+    out_loc: facts.Location
+
+    def __init__(self, in_loc: facts.Location, out_loc: facts.Location) -> None:
+        """Initialize Base64 encode node.
+
+        Parameters
+        ----------
+        in_loc: facts.Location
+            Location to read input from.
+        out_loc: facts.Location
+            Location to write encoded output to.
+        """
+        self.in_loc = in_loc
+        self.out_loc = out_loc
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            location_parameter_binds={"out_loc_spec": out_loc.loc, "in_loc_spec": in_loc.loc},
+            scope_parameter_binds={"out_loc_scope": out_loc.scope, "in_loc_scope": in_loc.scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "in_loc": {(None, self.in_loc.to_datalog_fact_string())},
+            "out_loc": {(None, self.out_loc.to_datalog_fact_string())},
+        }
+
+
+class Base64DecodeNode(BoundParameterisedModelNode):
+    """Model for Base64 decode operation.
+
+    Reads a value from some location, Base64-decodes it and writes the result to another location.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("out_loc_scope"),
+                        facts.ParameterPlaceholderLocation("out_loc_spec"),
+                    ),
+                    facts.UnaryStringOp(
+                        facts.UnaryStringOperator.BASE64DECODE,
+                        facts.Read(
+                            facts.Location(
+                                facts.ParameterPlaceholderScope("in_loc_scope"),
+                                facts.ParameterPlaceholderLocation("in_loc_spec"),
+                            )
+                        ),
+                    ),
+                )
+            }
+        )
+
+    #: Location to read input from.
+    in_loc: facts.Location
+    #: Location to write decoded output to.
+    out_loc: facts.Location
+
+    def __init__(self, in_loc: facts.Location, out_loc: facts.Location) -> None:
+        """Initialize Base64 decode node.
+
+        Parameters
+        ----------
+        in_loc: facts.Location
+            Location to read input from.
+        out_loc: facts.Location
+            Location to write decoded output to.
+        """
+        self.in_loc = in_loc
+        self.out_loc = out_loc
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(),
+            location_parameter_binds={"out_loc_spec": out_loc.loc, "in_loc_spec": in_loc.loc},
+            scope_parameter_binds={"out_loc_scope": out_loc.scope, "in_loc_scope": in_loc.scope},
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {
+            "in_loc": {(None, self.in_loc.to_datalog_fact_string())},
+            "out_loc": {(None, self.out_loc.to_datalog_fact_string())},
+        }
+
+
+class MavenBuildModelNode(BoundParameterisedModelNode):
+    """Model for Maven build commands.
+
+    Maven build  behaviour is approximated as writing some files under the target directory.
+    """
+
+    @staticmethod
+    @cache
+    def get_model() -> evaluation.StatementSet:
+        """Return model."""
+        return evaluation.StatementSet(
+            {
+                evaluation.WriteStatement(
+                    facts.Location(
+                        facts.ParameterPlaceholderScope("filesystem_scope"),
+                        facts.FilesystemAnyUnderDir(facts.StringLiteral("./target")),
+                    ),
+                    facts.ArbitraryNewData("mvn"),  # TODO something better?
+                )
+            }
+        )
+
+    #: Scope for filesystem written to.
+    filesystem_scope: facts.Scope
+
+    def __init__(self, filesystem_scope: facts.Scope) -> None:
+        """Initialize Maven build node.
+
+        Parameters
+        ----------
+        filesystem_scope: facts.Scope
+            Scope for filesystem written to.
+        """
+        self.filesystem_scope = filesystem_scope
+
+        bound_stmts = BoundParameterisedStatementSet(
+            parameterised_stmts=self.get_model(), scope_parameter_binds={"filesystem_scope": filesystem_scope}
+        )
+
+        super().__init__(bound_stmts)
+
+    def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]:
+        """Return a properties tables with the model parameters."""
+        return {"filesystem_scope": {(None, self.filesystem_scope.to_datalog_fact_string())}}
diff --git a/src/macaron/code_analyzer/dataflow_analysis/printing.py b/src/macaron/code_analyzer/dataflow_analysis/printing.py
new file mode 100644
index 000000000..0ffd61813
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/printing.py
@@ -0,0 +1,681 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Functions for printing/displaying dataflow analysis nodes in the form of graphviz (dot) output.
+
+Allows the analysis representation and results to be rendered as a human-readable node-link graph.
+
+Makes use of graphviz's html-like label feature to add detailed information to each node.
+Tables are specified in the form of a dict[str, set[tuple[str | None, str]], which is rendered as
+a two-column table, with the first column containing each of the keys of the dict, and the second
+column containing the corresponding set of values, as a nested vertical table, with each value having
+an optional label that, if present, will be rendered in a visually distinguished manner alongside the
+value.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+from dataclasses import dataclass
+from typing import TextIO
+
+from macaron.code_analyzer.dataflow_analysis import core
+
+
+def print_as_dot_graph(node: core.Node, out: TextIO, include_properties: bool, include_states: bool) -> None:
+    """Print root node as dot graph.
+
+    Parameters
+    ----------
+    node: core.Node
+        The root node to print.
+    out: TextIO
+        Output stream to print to.
+    include_properties: bool
+        Whether to include detail on the properties of each node (disable to make nodes simpler/smaller).
+    include_states: bool
+        Whether to include detail on the abstract state at each node (disable to make nodes simpler/smaller).
+    """
+    out.write("digraph {\n")
+    out.write('node [style="filled", fillcolor="white"]\n')
+    print_as_dot_string(node, out, include_properties=include_properties, include_states=include_states)
+    out.write("}\n")
+
+
+def get_printable_table_for_state(
+    state: core.State, state_filter: core.StateTransferFilter | None = None
+) -> dict[str, set[tuple[str | None, str]]]:
+    """Return a table of the stringified representation of the state.
+
+    Consists of a mapping of storage locations to the set of values they may contain
+    (see module comment for description of the return type).
+
+    Values are additionally labeled with whether they were new and not copied, and whether
+    they will be excluded by the given filter.
+    """
+    result: dict[str, set[tuple[str | None, str]]] = {}
+    for key, vals in state.state.items():
+        vals_strs: set[tuple[str | None, str]] = {
+            (
+                str(label.sequence_number)
+                + ("*" if not label.copied else "")
+                + ("!" if state_filter is not None and not state_filter.should_transfer(key) else ""),
+                val.to_datalog_fact_string(),
+            )
+            for val, label in vals.items()
+        }
+        key_str = key.to_datalog_fact_string()
+        result[key_str] = vals_strs
+    return result
+
+
+def print_as_dot_string(node: core.Node, out: TextIO, include_properties: bool, include_states: bool) -> None:
+    """Print node as dot representation (to be embedded within a dot graph).
+
+    Parameters
+    ----------
+    node: core.Node
+        The node to print.
+    out: TextIO
+        Output stream to print to.
+    include_properties: bool
+        Whether to include detail on the properties of each node (disable to make nodes simpler/smaller).
+    include_states: bool
+        Whether to include detail on the abstract state at each node (disable to make nodes simpler/smaller).
+    """
+    match node:
+        case core.ControlFlowGraphNode():
+            print_cfg_node_as_dot_string(node, out, include_properties, include_states)
+        case core.StatementNode():
+            print_statement_node_as_dot_string(node, out, include_properties, include_states)
+        case core.InterpretationNode():
+            print_interpretation_node_as_dot_string(node, out, include_properties, include_states)
+
+
+def print_cfg_node_as_dot_string(
+    cfg_node: core.ControlFlowGraphNode, out: TextIO, include_properties: bool, include_states: bool
+) -> None:
+    """Print control-flow-graph node as dot representation (to be embedded within a dot graph).
+
+    Parameters
+    ----------
+    cfg_node: core.ControlFlowGraphNode
+        The control-flow-graph node to print.
+    out: TextIO
+        Output stream to print to.
+    include_properties: bool
+        Whether to include detail on the properties of each node (disable to make nodes simpler/smaller).
+    include_states: bool
+        Whether to include detail on the abstract state at each node (disable to make nodes simpler/smaller).
+    """
+    out.write("subgraph cluster_n" + str(id(cfg_node)) + "{\n")
+    out.write("style=filled\n")
+    out.write('fillcolor="#fdf3e4ff"\n')
+
+    subtables: list[tuple[str, dict[str, set[tuple[str | None, str]]], DotHtmlLikeTableConfiguration]] = []
+    if include_properties:
+        properties_table = cfg_node.get_printable_properties_table()
+        if len(properties_table) > 0:
+            subtables.append(
+                (
+                    "Properties",
+                    cfg_node.get_printable_properties_table(),
+                    DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE_PROPERTIES,
+                )
+            )
+
+    if include_states:
+        subtables.append(
+            (
+                "Before State",
+                get_printable_table_for_state(cfg_node.before_state),
+                DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE_PROPERTIES,
+            )
+        )
+        if core.DEFAULT_EXIT in cfg_node.exit_states:
+            subtables.append(
+                (
+                    "Exit State",
+                    get_printable_table_for_state(
+                        cfg_node.exit_states[core.DEFAULT_EXIT], cfg_node.get_exit_state_transfer_filter()
+                    ),
+                    DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE_PROPERTIES,
+                )
+            )
+        for exit_type, exit_state in cfg_node.exit_states.items():
+            if not isinstance(exit_type, core.DefaultExit):
+                subtables.append(
+                    (
+                        "Exit State (" + exit_type.__class__.__name__ + ")",
+                        get_printable_table_for_state(exit_state, cfg_node.get_exit_state_transfer_filter()),
+                        DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE_PROPERTIES,
+                    )
+                )
+
+    out.write(
+        produce_node_dot_def(
+            node_id=("n" + str(id(cfg_node))),
+            node_kind="ControlFlowGraph",
+            node_type=cfg_node.__class__.__name__,
+            node_label=(
+                "["
+                + ", ".join(
+                    [str(cfg_node.created_debug_sequence_num)]
+                    + ["(" + str(b) + "-" + str(e) + ")" for b, e in cfg_node.processed_log]
+                )
+                + "]"
+                if include_states
+                else None
+            ),
+            config=DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE,
+            subtables=subtables,
+        )
+        + "\n"
+    )
+
+    i = 0
+    out.write("n" + str(id(cfg_node)) + " -> " + "c" + str(id(cfg_node.get_entry())) + ' [label="entry"]\n')
+
+    for child_node in cfg_node.children():
+        out.write(
+            "c"
+            + str(id(child_node))
+            + ' [label="'
+            + str(i)
+            + '", shape=circle, fontcolor="#ffffffff", fillcolor="#aa643bff"]\n'
+        )
+        out.write(
+            "e"
+            + str(id(cfg_node))
+            + '_exit [label="exit", shape=circle, fontcolor="#ffffffff", fillcolor="#aa643bff"]\n'
+        )
+        next_alt_exit_id = 0
+        alt_exit_ids: dict[core.ExitType, int] = {}
+
+        for exit_type in child_node.exit_states:
+            successors = cfg_node.get_successors(child_node, exit_type)
+            for successor in successors:
+                if isinstance(successor, core.Node):
+                    out.write("c" + str(id(child_node)) + " -> " + "c" + str(id(successor)) + ' [label=""]\n')
+                elif isinstance(successor, core.DefaultExit):
+                    out.write("c" + str(id(child_node)) + " -> " + "e" + str(id(cfg_node)) + "_exit" + ' [label=""]\n')
+                else:
+                    if successor not in alt_exit_ids:
+                        alt_exit_ids[successor] = next_alt_exit_id
+                        next_alt_exit_id = next_alt_exit_id + 1
+                    alt_exit_id = alt_exit_ids[successor]
+                    out.write(
+                        "c"
+                        + str(id(child_node))
+                        + " -> "
+                        + "e"
+                        + str(id(cfg_node))
+                        + "_alt_exit_"
+                        + str(alt_exit_id)
+                        + ' [label=""]\n'
+                    )
+
+        for alt_exit_id in alt_exit_ids.values():
+            out.write(
+                "e"
+                + str(id(cfg_node))
+                + "_alt_exit_"
+                + str(alt_exit_id)
+                + ' [label="alt-exit", shape=circle, fontcolor="#ffffffff", fillcolor="#aa643bff"]\n'
+            )
+        i = i + 1
+    out.write("}\n")
+
+    for child_node in cfg_node.children():
+        out.write("c" + str(id(child_node)) + " -> " + "n" + str(id(child_node)) + ' [label=""]\n')
+
+    for child_node in cfg_node.children():
+        print_as_dot_string(child_node, out, include_properties=include_properties, include_states=include_states)
+
+
+def print_statement_node_as_dot_string(
+    node: core.StatementNode, out: TextIO, include_properties: bool, include_states: bool
+) -> None:
+    """Print statement node as dot representation (to be embedded within a dot graph).
+
+    Parameters
+    ----------
+    node: core.StatementNode
+        The statement node to print.
+    out: TextIO
+        Output stream to print to.
+    include_properties: bool
+        Whether to include detail on the properties of each node (disable to make nodes simpler/smaller).
+    include_states: bool
+        Whether to include detail on the abstract state at each node (disable to make nodes simpler/smaller).
+    """
+    subtables: list[tuple[str, dict[str, set[tuple[str | None, str]]], DotHtmlLikeTableConfiguration]] = []
+
+    if include_properties:
+        properties_table = node.get_printable_properties_table()
+        if len(properties_table) > 0:
+            subtables.append(
+                (
+                    "Properties",
+                    node.get_printable_properties_table(),
+                    DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE_PROPERTIES,
+                )
+            )
+
+    if include_states:
+        subtables.append(
+            (
+                "Before State",
+                get_printable_table_for_state(node.before_state),
+                DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE_PROPERTIES,
+            )
+        )
+        if core.DEFAULT_EXIT in node.exit_states:
+            subtables.append(
+                (
+                    "Exit State",
+                    get_printable_table_for_state(
+                        node.exit_states[core.DEFAULT_EXIT], node.get_exit_state_transfer_filter()
+                    ),
+                    DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE_PROPERTIES,
+                )
+            )
+        for exit_type, exit_state in node.exit_states.items():
+            if not isinstance(exit_type, core.DefaultExit):
+                subtables.append(
+                    (
+                        "Exit State + (" + exit_type.__class__.__name__ + ")",
+                        get_printable_table_for_state(exit_state, node.get_exit_state_transfer_filter()),
+                        DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE_PROPERTIES,
+                    )
+                )
+
+    out.write(
+        produce_node_dot_def(
+            node_id=("n" + str(id(node))),
+            node_kind="Statement",
+            node_type=node.__class__.__name__,
+            node_label=(
+                "["
+                + ", ".join(
+                    [str(node.created_debug_sequence_num)]
+                    + ["(" + str(b) + "-" + str(e) + ")" for b, e in node.processed_log]
+                )
+                + "]"
+                if include_states
+                else None
+            ),
+            config=DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE,
+            subtables=subtables,
+        )
+        + "\n"
+    )
+
+
+def print_interpretation_node_as_dot_string(
+    node: core.InterpretationNode, out: TextIO, include_properties: bool, include_states: bool
+) -> None:
+    """Print interpretation node as dot representation (to be embedded within a dot graph).
+
+    Parameters
+    ----------
+    node: core.InterpretationNode
+        The interpretation node to print.
+    out: TextIO
+        Output stream to print to.
+    include_properties: bool
+        Whether to include detail on the properties of each node (disable to make nodes simpler/smaller).
+    include_states: bool
+        Whether to include detail on the abstract state at each node (disable to make nodes simpler/smaller).
+    """
+    subtables: list[tuple[str, dict[str, set[tuple[str | None, str]]], DotHtmlLikeTableConfiguration]] = []
+
+    if include_properties:
+        properties_table = node.get_printable_properties_table()
+        if len(properties_table) > 0:
+            subtables.append(
+                (
+                    "Properties",
+                    node.get_printable_properties_table(),
+                    DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE_PROPERTIES,
+                )
+            )
+
+    if include_states:
+        subtables.append(
+            (
+                "Before State",
+                get_printable_table_for_state(node.before_state),
+                DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE_PROPERTIES,
+            )
+        )
+        if core.DEFAULT_EXIT in node.exit_states:
+            subtables.append(
+                (
+                    "Exit State",
+                    get_printable_table_for_state(
+                        node.exit_states[core.DEFAULT_EXIT], node.get_exit_state_transfer_filter()
+                    ),
+                    DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE_PROPERTIES,
+                )
+            )
+        for exit_type, exit_state in node.exit_states.items():
+            if not isinstance(exit_type, core.DefaultExit):
+                subtables.append(
+                    (
+                        "Exit State + (" + exit_type.__class__.__name__ + ")",
+                        get_printable_table_for_state(exit_state, node.get_exit_state_transfer_filter()),
+                        DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE_PROPERTIES,
+                    )
+                )
+
+    out.write(
+        produce_node_dot_def(
+            node_id=("n" + str(id(node))),
+            node_kind="Interpretation",
+            node_type=node.__class__.__name__,
+            node_label=(
+                "["
+                + ", ".join(
+                    [str(node.created_debug_sequence_num)]
+                    + ["(" + str(b) + "-" + str(e) + ")" for b, e in node.processed_log]
+                )
+                + "]"
+                if include_states
+                else None
+            ),
+            config=DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE,
+            subtables=subtables,
+        )
+        + "\n"
+    )
+    for child_node in node.interpretations.values():
+        out.write("n" + str(id(node)) + " -> " + "n" + str(id(child_node)) + ' [label="interpretation"]\n')
+    for child_node in node.interpretations.values():
+        print_as_dot_string(child_node, out, include_properties=include_properties, include_states=include_states)
+
+
+def escape_for_dot_html_like_label(s: str) -> str:
+    """Return string escape for inclusion in a dot html-like label."""
+    return s.replace("&", "&amp;").replace('"', "&quot;").replace("<", "&lt;").replace(">", "&gt;")
+
+
+@dataclass(frozen=True)
+class DotHtmlLikeTableConfiguration:
+    """Configuration for rendering of dot html-like table."""
+
+    #: Background colour for table header.
+    header_colour: str
+    #: Font colour for table header.
+    header_font_colour: str
+    #: Font size for table header.
+    header_font_size: int
+    #: Whether font of table header should be bold.
+    header_font_bold: bool
+    #: Background colour for table body.
+    body_colour: str
+    #: Font colour for table body.
+    body_font_colour: str
+    #: Font size for table body.
+    body_font_size: int
+
+
+DARK_BLUE = "#6f757eff"
+LIGHT_BLUE = "#dae2efff"
+DARK_BROWN = "#aa643bff"
+LIGHT_BROWN = "#f5debdff"
+DARK_PINK = "#a36472ff"
+LIGHT_PINK = "#f6dae1ff"
+LIGHT_TEXT = "#ffffffff"
+DARK_TEXT = "#161513ff"
+DARK_GREY = "#7a736eff"
+LIGHT_GREY = "#e4e1dcff"
+
+
+DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE = DotHtmlLikeTableConfiguration(
+    header_colour=DARK_PINK,
+    header_font_colour=LIGHT_TEXT,
+    header_font_size=24,
+    header_font_bold=True,
+    body_colour=LIGHT_PINK,
+    body_font_colour=DARK_TEXT,
+    body_font_size=6,
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE_PROPERTIES = dataclasses.replace(
+    DOT_HTML_LIKE_TABLE_CONFIG_INTERPRETATION_NODE, header_font_size=12
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE = DotHtmlLikeTableConfiguration(
+    header_colour=DARK_BROWN,
+    header_font_colour=LIGHT_TEXT,
+    header_font_size=24,
+    header_font_bold=True,
+    body_colour=LIGHT_BROWN,
+    body_font_colour=DARK_TEXT,
+    body_font_size=6,
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE_PROPERTIES = dataclasses.replace(
+    DOT_HTML_LIKE_TABLE_CONFIG_CONTROL_FLOW_GRAPH_NODE, header_font_size=12
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE = DotHtmlLikeTableConfiguration(
+    header_colour=DARK_BLUE,
+    header_font_colour=LIGHT_TEXT,
+    header_font_size=24,
+    header_font_bold=True,
+    body_colour=LIGHT_BLUE,
+    body_font_colour=DARK_TEXT,
+    body_font_size=6,
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE_PROPERTIES = dataclasses.replace(
+    DOT_HTML_LIKE_TABLE_CONFIG_STATEMENT_NODE, header_font_size=12
+)
+
+DOT_HTML_LIKE_TABLE_CONFIG_STATE = DotHtmlLikeTableConfiguration(
+    header_colour=DARK_GREY,
+    header_font_colour=LIGHT_TEXT,
+    header_font_size=12,
+    header_font_bold=True,
+    body_colour=LIGHT_GREY,
+    body_font_colour=DARK_TEXT,
+    body_font_size=6,
+)
+
+
+def truncate_long_strings_for_display(s: str) -> str:
+    """Truncate long string if necessary for display."""
+    if len(s) > 100:
+        return s[:100] + "..."
+    return s
+
+
+def produce_dot_html_like_table(
+    header: str, data: dict[str, set[tuple[str | None, str]]], config: DotHtmlLikeTableConfiguration
+) -> str:
+    """Return the given data table rendered as a dot html-like label table.
+
+    See module comment for description of how data tables are rendered.
+    """
+    lines: list[str] = []
+    lines.append(
+        '<table bgcolor="'
+        + config.body_colour
+        + '" align="center" valign="middle" border="0" cellspacing="0" cellborder="1" cellpadding="0">'
+    )
+    lines.append(
+        '  <tr><td colspan="2" bgcolor="'
+        + config.header_colour
+        + '"><font color="'
+        + config.header_font_colour
+        + '" point-size="'
+        + str(config.header_font_size)
+        + '">'
+        + ("<b>" if config.header_font_bold else "")
+        + escape_for_dot_html_like_label(header)
+        + ("</b>" if config.header_font_bold else "")
+        + "</font></td></tr>"
+    )
+
+    for key, vals in data.items():
+        lines.append(
+            '  <tr><td><font color="'
+            + config.body_font_colour
+            + '" point-size="'
+            + str(config.body_font_size)
+            + '">'
+            + escape_for_dot_html_like_label(key)
+            + "</font></td>"
+        )
+        lines.append(
+            '    <td><table align="center" valign="middle" border="0" cellspacing="0" cellborder="0" cellpadding="1" rows="*">'
+        )
+        if len(vals) > 0:
+            for val in vals:
+                label_part = (
+                    (
+                        '<font color="'
+                        + config.body_font_colour
+                        + '" point-size="'
+                        + str(config.body_font_size)
+                        + '"><b>['
+                        + escape_for_dot_html_like_label(val[0])
+                        + "] </b></font>"
+                    )
+                    if val[0] is not None
+                    else ""
+                )
+                lines.append(
+                    "      <tr><td>"
+                    + label_part
+                    + '<font color="'
+                    + config.body_font_colour
+                    + '" point-size="'
+                    + str(config.body_font_size)
+                    + '">'
+                    + escape_for_dot_html_like_label(truncate_long_strings_for_display(val[1]))
+                    + "</font></td></tr>"
+                )
+        else:
+            lines.append("    <tr><td></td></tr>")
+
+        lines.append("    </table></td>")
+        lines.append("  </tr>")
+
+    lines.append("</table>")
+
+    return "\n".join(lines)
+
+
+def produce_node_dot_html_like_label(
+    node_kind: str,
+    node_type: str,
+    node_label: str | None,
+    config: DotHtmlLikeTableConfiguration,
+    subtables: list[tuple[str, dict[str, set[tuple[str | None, str]]], DotHtmlLikeTableConfiguration]],
+) -> str:
+    """Return the given node table data rendered as a dot html-like label table.
+
+    Contains nested tables for each subtable (see module comment for description of how data tables are rendered).
+    """
+    lines: list[str] = []
+    lines.append(
+        '< <table bgcolor="'
+        + config.body_colour
+        + '" align="center" valign="middle" border="0" cellspacing="0" cellborder="1" cellpadding="0">'
+    )
+    lines.append(
+        '  <tr><td colspan="2" bgcolor="'
+        + config.header_colour
+        + '">'
+        + '<font color="'
+        + config.header_font_colour
+        + '" point-size="'
+        + str(config.header_font_size)
+        + '">'
+        + ("<b>" if config.header_font_bold else "")
+        + escape_for_dot_html_like_label(node_kind)
+        + ("</b>" if config.header_font_bold else "")
+        + "</font></td></tr>"
+    )
+    lines.append(
+        '  <tr><td colspan="2"><font color="'
+        + config.body_font_colour
+        + '" point-size="'
+        + str(config.header_font_size)
+        + '">'
+        + ("<b>" if config.header_font_bold else "")
+        + escape_for_dot_html_like_label(node_type)
+        + ("</b>" if config.header_font_bold else "")
+        + "</font></td></tr>"
+    )
+    if node_label is not None:
+        lines.append(
+            '  <tr><td colspan="2"><font color="'
+            + config.body_font_colour
+            + '" point-size="'
+            + str(config.header_font_size)
+            + '">'
+            + (
+                (
+                    '<font color="'
+                    + config.body_font_colour
+                    + '" point-size="'
+                    + str(config.body_font_size)
+                    + '">'
+                    + "<b>"
+                    + escape_for_dot_html_like_label(node_label)
+                    + "</b></font>"
+                )
+                if node_label is not None
+                else ""
+            )
+            + "</font></td></tr>"
+        )
+
+    for subtable in subtables:
+        subtable_header, subtable_data, subtable_config = subtable
+        lines.append(
+            '  <tr><td colspan="2">'
+            + produce_dot_html_like_table(subtable_header, subtable_data, subtable_config)
+            + "</td></tr>"
+        )
+
+    lines.append("</table> >")
+
+    return "\n".join(lines)
+
+
+def produce_node_dot_def(
+    node_id: str,
+    node_kind: str,
+    node_type: str,
+    node_label: str | None,
+    config: DotHtmlLikeTableConfiguration,
+    subtables: list[tuple[str, dict[str, set[tuple[str | None, str]]], DotHtmlLikeTableConfiguration]],
+) -> str:
+    """Return the given node table data rendered as a dot node containig a html-like label table.
+
+    Contains nested tables for each subtable (see module comment for description of how data tables
+    are rendered).
+    """
+    return (
+        '"'
+        + node_id
+        + '" [shape=rectangle, fillcolor="'
+        + config.body_colour
+        + '" fontname="Oracle Sans Tab", label='
+        + produce_node_dot_html_like_label(node_kind, node_type, node_label, config, subtables)
+        + "]"
+    )
+
+
+def add_context_owned_scopes_to_properties_table(
+    table: dict[str, set[tuple[str | None, str]]], context: core.ContextRef[core.Context]
+) -> None:
+    """Add an entry to the given data table listing the scopes owned by the given context."""
+    owned_scopes = core.get_owned_scopes(context)
+    if len(owned_scopes) > 0:
+        table["scopes"] = {(None, scope.to_datalog_fact_string(include_outer_scope=True)) for scope in owned_scopes}
diff --git a/src/macaron/code_analyzer/dataflow_analysis/run_analysis_standalone.py b/src/macaron/code_analyzer/dataflow_analysis/run_analysis_standalone.py
new file mode 100644
index 000000000..faaf084ea
--- /dev/null
+++ b/src/macaron/code_analyzer/dataflow_analysis/run_analysis_standalone.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Module providing entry point to run dataflow analysis independently of Macaron command.
+
+For experimentation and debugging purposes only.
+"""
+
+import sys
+
+from macaron.code_analyzer.dataflow_analysis import analysis, bash, core, github, printing
+from macaron.slsa_analyzer.build_tool import Maven
+
+
+def main() -> None:
+    """Entry point for running standalone analysis."""
+    raw_workflow_node = analysis.analyse_github_workflow_file(sys.argv[1], None)
+    with open("dot", "w", encoding="utf-8") as f:
+        printing.print_as_dot_graph(raw_workflow_node, f, include_properties=True, include_states=True)
+
+    nodes: list[core.Node] = [raw_workflow_node]
+    while len(nodes) > 0:
+        node = nodes.pop()
+
+        if isinstance(node, github.GitHubActionsActionStepNode):
+            print("Action {")  # noqa: T201
+            print("    name: " + node.uses_name)  # noqa: T201
+            print("    version: " + node.uses_version if node.uses_version is not None else "")  # noqa: T201
+            print("    with {")  # noqa: T201
+            for key, val in node.with_parameters.items():
+                print("        " + key + ": " + val.to_datalog_fact_string())  # noqa: T201
+            print("    }")  # noqa: T201
+            print("}")  # noqa: T201
+        if isinstance(node, bash.BashSingleCommandNode):
+            print("REACHABLE SECRETS: " + str(analysis.get_reachable_secrets(node)))  # noqa: T201
+        for child in node.children():
+            nodes.append(child)
+
+    build_tool = Maven()
+
+    for build_cmd in analysis.get_build_tool_commands(core.NodeForest([raw_workflow_node]), build_tool):
+        print("build command: " + str(build_cmd["command"]))  # noqa: T201
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py
index 0d5cd66c1..ac2ceed68 100644
--- a/src/macaron/parsers/bashparser.py
+++ b/src/macaron/parsers/bashparser.py
@@ -13,65 +13,16 @@
 import logging
 import os
 import subprocess  # nosec B404
-from enum import Enum
-from typing import Any
+from typing import cast
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.config.defaults import defaults
 from macaron.config.global_config import global_config
-from macaron.errors import CallGraphError, ParseError
-from macaron.parsers.actionparser import get_run_step
-from macaron.parsers.github_workflow_model import Step
+from macaron.errors import ParseError
+from macaron.parsers.bashparser_model import File, Word
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class BashScriptType(Enum):
-    """This class is used for different bash script types."""
-
-    NONE = "None"
-    INLINE = "inline"  # Inline bash script.
-    FILE = "file"  # Bash script file.
-
-
-class BashNode(BaseNode):
-    """This class represents a callgraph node for bash commands."""
-
-    def __init__(
-        self,
-        name: str,
-        node_type: BashScriptType,
-        source_path: str,
-        parsed_step_obj: Step | None,
-        parsed_bash_obj: dict,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        name : str
-            Name of the bash script file or the step name if the script is inlined.
-        node_type : BashScriptType
-            The type of the script.
-        source_path : str
-            The path of the script.
-        parsed_step_obj : Step | None
-            The parsed step object.
-        parsed_bash_obj : dict
-            The parsed bash script object.
-        """
-        super().__init__(**kwargs)
-        self.name = name
-        self.node_type: BashScriptType = node_type
-        self.source_path = source_path
-        self.parsed_step_obj = parsed_step_obj
-        self.parsed_bash_obj = parsed_bash_obj
-
-    def __str__(self) -> str:
-        return f"BashNode({self.name},{self.node_type})"
-
-
 def parse_file(file_path: str, macaron_path: str | None = None) -> dict:
     """Parse a bash script file.
 
@@ -157,111 +108,107 @@ def parse(bash_content: str, macaron_path: str | None = None) -> dict:
         raise ParseError("Error while loading the parsed bash script.") from error
 
 
-def create_bash_node(
-    name: str,
-    node_id: str | None,
-    node_type: BashScriptType,
-    source_path: str,
-    ci_step_ast: Step | None,
-    repo_path: str,
-    caller: BaseNode,
-    recursion_depth: int,
-    macaron_path: str | None = None,
-) -> BashNode:
-    """Create a callgraph node for a bash script.
+def parse_raw(bash_content: str, macaron_path: str | None = None) -> File:
+    """Parse a bash script's content.
+
+    Parameters
+    ----------
+    bash_content : str
+        Bash script content
+    macaron_path : str | None
+        Macaron's root path (optional).
+
+    Returns
+    -------
+    bashparser_model.File
+        The parsed bash script AST in typed JSON (dict) format.
+
+    Raises
+    ------
+    ParseError
+        When parsing fails with errors.
+    """
+    if not macaron_path:
+        macaron_path = global_config.macaron_path
+    cmd = [
+        os.path.join(macaron_path, "bin", "bashparser"),
+        "-input",
+        bash_content,
+        "-raw",
+    ]
+
+    try:
+        result = subprocess.run(  # nosec B603
+            cmd,
+            capture_output=True,
+            check=True,
+            cwd=macaron_path,
+            timeout=defaults.getint("bashparser", "timeout", fallback=30),
+        )
+    except (
+        subprocess.CalledProcessError,
+        subprocess.TimeoutExpired,
+        FileNotFoundError,
+    ) as error:
+        raise ParseError("Error while parsing bash script.") from error
+
+    try:
+        if result.returncode == 0:
+            return cast(File, json.loads(result.stdout.decode("utf-8")))
+
+        raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}")
+
+    except json.JSONDecodeError as error:
+        raise ParseError("Error while loading the parsed bash script.") from error
 
-    A bash node can have the following types:
 
-      * :class:`BashScriptType.INLINE` when it is inlined in a CI workflow.
-      * :class:`BashScriptType.FILE` when it is a bash script file.
+def parse_expr(bash_expr_content: str, macaron_path: str | None = None) -> list[Word]:
+    """Parse a bash script's content.
 
     Parameters
     ----------
-    name: str
-        A name to be used as the identifier of the node.
-    node_id: str | None
-        The node ID if defined.
-    node_type: BashScriptType
-        The type of the node.
-    source_path: str
-        The file that contains the bash script.
-    ci_step_ast: Step | None
-        The AST of the CI step that runs a bash script.
-    repo_path: str
-        The path to the target repo.
-    caller: BaseNode
-        The caller node.
-    recursion_depth: int
-        The number of times this function is called recursively.
-    macaron_path=None
-        The path to the Macaron module.
+    bash_content : str
+        Bash script content
+    macaron_path : str | None
+        Macaron's root path (optional).
 
     Returns
     -------
-    BashNode
-        A bash node object.
+    list[bashparser_model.Word]
+        The parsed bash expr AST in typed JSON (dict) format.
 
     Raises
     ------
-    CallGraphError
-        When unable to create a bash node.
+    ParseError
+        When parsing fails with errors.
     """
-    if recursion_depth > defaults.getint("bashparser", "recursion_depth", fallback=3):
-        raise CallGraphError(f"The analysis has reached maximum recursion depth {recursion_depth} at {source_path}.")
-    parsed_bash_script = {}
-    working_dir = None
-    match node_type:
-        case BashScriptType.INLINE:
-            if ci_step_ast is None:
-                raise CallGraphError(f"Unable to find the parsed AST for the CI step at {source_path}.")
-            working_dir = ci_step_ast.get("working-directory")
-            run_script = get_run_step(ci_step_ast)
-            if run_script is None:
-                raise CallGraphError(f"Invalid run step at {source_path}.")
-            try:
-                parsed_bash_script = parse(run_script, macaron_path=macaron_path)
-            except ParseError as error:
-                logger.debug(error)
-        case BashScriptType.FILE:
-            try:
-                parsed_bash_script = parse_file(source_path, macaron_path=macaron_path)
-            except ParseError as error:
-                logger.debug(error)
-    bash_node = BashNode(
-        name,
-        node_type,
-        source_path,
-        parsed_step_obj=ci_step_ast,
-        parsed_bash_obj=parsed_bash_script,
-        node_id=node_id,
-        caller=caller,
-    )
-    caller_commands = parsed_bash_script.get("commands", [])
-
-    # Parse the bash script files called from the current script.
-    if caller_commands and repo_path:
-        for cmd in caller_commands:
-            # Parse the scripts that end with `.sh`.
-            # TODO: parse Makefiles for bash commands.
-            if not cmd or not cmd[0] or not cmd[0].endswith(".sh"):
-                continue
-
-            # Check for path traversal patterns before analyzing a bash file.
-            bash_file_path = os.path.realpath(os.path.join(repo_path, working_dir or "", cmd[0]))
-            if os.path.exists(bash_file_path) and bash_file_path.startswith(repo_path):
-                try:
-                    callee = create_bash_node(
-                        name=cmd[0],
-                        node_id=node_id,
-                        node_type=BashScriptType.FILE,
-                        source_path=bash_file_path,
-                        ci_step_ast=None,
-                        repo_path=repo_path,
-                        caller=bash_node,
-                        recursion_depth=recursion_depth + 1,
-                        macaron_path=macaron_path,
-                    )
-                except CallGraphError as error:
-                    raise error
-                bash_node.add_callee(callee)
-    return bash_node
+    if not macaron_path:
+        macaron_path = global_config.macaron_path
+    cmd = [
+        os.path.join(macaron_path, "bin", "bashexprparser"),
+        "-input",
+        bash_expr_content,
+    ]
+    try:
+        result = subprocess.run(  # nosec B603
+            cmd,
+            capture_output=True,
+            check=True,
+            cwd=macaron_path,
+            timeout=defaults.getint("bashparser", "timeout", fallback=30),
+        )
+    except (
+        subprocess.CalledProcessError,
+        subprocess.TimeoutExpired,
+        FileNotFoundError,
+    ) as error:
+        raise ParseError("Error while parsing bash expr.") from error
+
+    try:
+        if result.returncode == 0:
+            return cast(list[Word], json.loads(result.stdout.decode("utf-8")))
+
+        raise ParseError(f"Bash script parser failed: {result.stderr.decode('utf-8')}")
+
+    except json.JSONDecodeError as error:
+        raise ParseError("Error while loading the parsed bash script.") from error
diff --git a/src/macaron/parsers/bashparser_model.py b/src/macaron/parsers/bashparser_model.py
new file mode 100644
index 000000000..09ca83813
--- /dev/null
+++ b/src/macaron/parsers/bashparser_model.py
@@ -0,0 +1,848 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Type definitions for Bash AST as produced (and json-serialised) by the "mvdan.cc/sh/v3/syntax" bash parser."""
+
+# Suppress warnings about non-ALL_CAPS names (that reference the original names and so are not ours to change).
+# pylint: disable=invalid-name
+
+# Suppress false positive warnings caused by a field with the same name as its type (again, not our names to change)
+# pylint: disable=used-before-assignment
+
+# Refer to original definitions in codebase for description, no need to comment here.
+# pylint: disable=missing-class-docstring,missing-function-docstring
+
+# Forward-referencing union type defs are not expressible with "|" until python 3.12 type statement syntax is available.
+# pylint: disable=consider-alternative-union-syntax
+
+# Suppress all flake8 warnings for the same reasons as above (it does not allow selective disabling at file level).
+# flake8: noqa
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import Literal, NotRequired, TypedDict, TypeGuard, Union
+
+
+class Pos(TypedDict):
+    Offset: int
+    Line: int
+    Col: int
+
+
+class Comment(TypedDict):
+    Hash: Pos
+    Text: str
+
+
+WordPart = Union[
+    "Lit", "SglQuoted", "DblQuoted", "ParamExp", "CmdSubst", "ArithmExp", "ProcSubst", "ExtGlob", "BraceExp"
+]
+
+ArithmExpr = Union["BinaryArithm", "UnaryArithm", "ParenArithm", "Word"]
+
+UnAritOperator = int
+
+
+class UnAritOperators(Enum):
+    Not = 34  # !
+    BitNegation = 35  # ~
+    Inc = 36  # ++
+    Dec = 37  # --
+    Plus = 68  # +
+    Minus = 70  # -
+
+
+class UnaryArithm(TypedDict):
+    Type: Literal["UnaryArithm"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: UnAritOperator
+    Post: NotRequired[bool]
+    X: ArithmExpr
+
+
+def is_unary_arithm(expr: ArithmExpr) -> TypeGuard[UnaryArithm]:
+    return expr.get("Type", "") == "UnaryArithm"
+
+
+BinAritOperator = int
+
+
+class BinAritOperators(Enum):
+    Add = 68  # +
+    Sub = 70  # -
+    Mul = 38  # *
+    Quo = 85  # /
+    Rem = 76  # %
+    Pow = 39  # **
+    Eql = 40  # ==
+    Gtr = 54  # >
+    Lss = 56  # <
+    Neq = 41  # !=
+    Leq = 42  # <=
+    Geq = 43  # >=
+    And = 9  # &
+    Or = 12  # |
+    Xor = 80  # ^
+    Shr = 55  # >>
+    Shl = 61  # <<
+
+    AndArit = 10  # &&
+    OrArit = 11  # ||
+    Comma = 82  # ,
+    TernQuest = 72  # ?
+    TernColon = 87  # :
+
+    Assgn = 74  # =
+    AddAssgn = 44  # +=
+    SubAssgn = 45  # -=
+    MulAssgn = 46  # *=
+    QuoAssgn = 47  # /=
+    RemAssgn = 48  # %=
+    AndAssgn = 49  # &=
+    OrAssgn = 50  # |=
+    XorAssgn = 51  # ^=
+    ShlAssgn = 52  # <<=
+    ShrAssgn = 53  # >>=
+
+
+class BinaryArithm(TypedDict):
+    Type: Literal["BinaryArithm"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: BinAritOperator
+    X: ArithmExpr
+    Y: ArithmExpr
+
+
+def is_binary_arithm(expr: ArithmExpr) -> TypeGuard[BinaryArithm]:
+    return expr.get("Type", "") == "BinaryArithm"
+
+
+class ParenArithm(TypedDict):
+    Type: Literal["ParenArithm"]
+    Pos: Pos
+    End: Pos
+    Lparen: Pos
+    Rparen: Pos
+    X: ArithmExpr
+
+
+def is_paren_arithm(expr: ArithmExpr) -> TypeGuard[ParenArithm]:
+    return expr.get("Type", "") == "ParenArithm"
+
+
+def is_word_arithm(expr: ArithmExpr) -> TypeGuard[Word]:
+    return "Type" not in expr
+
+
+class Lit(TypedDict):
+    Type: Literal["Lit"]
+    Pos: Pos
+    End: Pos
+    ValuePos: Pos
+    ValueEnd: Pos
+    Value: str
+
+
+def is_lit(part: WordPart) -> TypeGuard[Lit]:
+    return part["Type"] == "Lit"
+
+
+class SglQuoted(TypedDict):
+    Type: Literal["SglQuoted"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    Dollar: NotRequired[bool]
+    Value: str
+
+
+def is_sgl_quoted(part: WordPart) -> TypeGuard[SglQuoted]:
+    return part["Type"] == "SglQuoted"
+
+
+class DblQuoted(TypedDict):
+    Type: Literal["DblQuoted"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    Dollar: NotRequired[bool]
+    Parts: NotRequired[list[WordPart]]
+
+
+def is_dbl_quoted(part: WordPart) -> TypeGuard[DblQuoted]:
+    return part["Type"] == "DblQuoted"
+
+
+class Slice(TypedDict):
+    Offset: ArithmExpr
+    Length: ArithmExpr
+
+
+class Replace(TypedDict):
+    All: NotRequired[bool]
+    Orig: Word
+    With: Word
+
+
+ParNamesOperator = int
+
+
+class ParNamesOperators(Enum):
+    NamesPrefix = 38  # *
+    NamesPrefixWords = 84  # @
+
+
+ParExpOperator = int
+
+
+class ParExpOperators(Enum):
+    AlternateUnset = 68  # +
+    AlternateUnsetOrNull = 69  # :+
+    DefaultUnset = 70  # -
+    DefaultUnsetOrNull = 71  # :-
+    ErrorUnset = 72  # ?
+    ErrorUnsetOrNull = 73  # :?
+    AssignUnset = 74  # =
+    AssignUnsetOrNull = 75  # :=
+    RemSmallSuffix = 76  # %
+    RemLargeSuffix = 77  # %%
+    RemSmallPrefix = 78  # #
+    RemLargePrefix = 79  # ##
+    UpperFirst = 80  # ^
+    UpperAll = 81  # ^^
+    LowerFirst = 82  # ,
+    LowerAll = 83  # ,,
+    OtherParamOps = 84  # @
+
+
+class Expansion(TypedDict):
+    Op: ParExpOperator
+    Word: Word
+
+
+class ParamExp(TypedDict):
+    Type: Literal["ParamExp"]
+    Pos: Pos
+    End: Pos
+    Dollar: NotRequired[Pos]
+    Rbrace: NotRequired[Pos]
+    Short: NotRequired[bool]
+    Excl: NotRequired[bool]
+    Length: NotRequired[bool]
+    Width: NotRequired[bool]
+    Param: Lit
+    Index: NotRequired[ArithmExpr]
+    Slice: NotRequired[Slice]
+    Repl: NotRequired[Replace]
+    Names: NotRequired[ParNamesOperator]
+    Exp: NotRequired[Expansion]
+
+
+def is_param_exp(part: WordPart) -> TypeGuard[ParamExp]:
+    return part["Type"] == "ParamExp"
+
+
+class CmdSubst(TypedDict):
+    Type: Literal["CmdSubst"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
+    Backquotes: NotRequired[bool]
+    TempFile: NotRequired[bool]
+    ReplyVar: NotRequired[bool]
+
+
+def is_cmd_subst(part: WordPart) -> TypeGuard[CmdSubst]:
+    return part["Type"] == "CmdSubst"
+
+
+class ArithmExp(TypedDict):
+    Type: Literal["ArithmExp"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    Bracket: NotRequired[bool]
+    Unsigned: NotRequired[bool]
+    X: ArithmExpr
+
+
+def is_arithm_exp(part: WordPart) -> TypeGuard[ArithmExp]:
+    return part["Type"] == "ArithmExp"
+
+
+ProcOperator = int
+
+
+class ProcOperators(Enum):
+    CmdIn = 66  # <(
+    CmdOut = 67  # >(
+
+
+class ProcSubst(TypedDict):
+    Type: Literal["ProcSubst"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Rparen: Pos
+    Op: ProcOperator
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
+
+
+def is_proc_subst(part: WordPart) -> TypeGuard[ProcSubst]:
+    return part["Type"] == "ProcSubst"
+
+
+GlobOperator = int
+
+
+class GlobOperators(Enum):
+    GlobZeroOrOne = 122  # ?(
+    GlobZeroOrMore = 123  # *(
+    GlobOneOrMore = 124  # +(
+    GlobOne = 125  # @(
+    GlobExcept = 126  # !(
+
+
+class ExtGlob(TypedDict):
+    Type: Literal["ExtGlob"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: GlobOperator
+    Pattern: Lit
+
+
+def is_ext_glob(part: WordPart) -> TypeGuard[ExtGlob]:
+    return part["Type"] == "ExtGlob"
+
+
+class BraceExp(TypedDict):
+    Type: Literal["BraceExp"]
+    Pos: Pos
+    End: Pos
+    Sequence: NotRequired[bool]
+    Elems: list[Word]
+
+
+def is_brace_exp(part: WordPart) -> TypeGuard[BraceExp]:
+    return part["Type"] == "BraceExp"
+
+
+class Word(TypedDict):
+    Parts: list[WordPart]
+
+
+RedirOperator = int
+
+
+class RedirOperators(Enum):
+    RdrOut = 54  # >
+    AppOut = 55  # >>
+    RdrIn = 56  # <
+    RdrInOut = 57  # <>
+    DplIn = 58  # <&
+    DplOut = 59  # >&
+    ClbOut = 60  # >|
+    Hdoc = 61  # <<
+    DashHdoc = 62  # <<-
+    WordHdoc = 63  # <<<
+    RdrAll = 64  # &>
+    AppAll = 65  # &>>
+
+
+class Redirect(TypedDict):
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: RedirOperator
+    N: NotRequired[Lit]
+    Word: NotRequired[Word]
+    Hdoc: NotRequired[Word]
+
+
+class ArrayElem(TypedDict):
+    Pos: Pos
+    End: Pos
+    Index: NotRequired[ArithmExpr]
+    Value: NotRequired[Word]
+    Comments: NotRequired[list[Comment]]
+
+
+class ArrayExpr(TypedDict):
+    Pos: Pos
+    End: Pos
+    Lparent: Pos
+    Rparen: Pos
+    Elems: list[ArrayElem]
+    Last: NotRequired[list[Comment]]
+
+
+class Assign(TypedDict):
+    Pos: Pos
+    End: Pos
+    Append: NotRequired[bool]
+    Naked: NotRequired[bool]
+    Name: Lit
+    Index: NotRequired[ArithmExpr]
+    Value: NotRequired[Word]
+    Array: NotRequired[ArrayExpr]
+
+
+Command = Union[
+    "CallExpr",
+    "IfClause",
+    "WhileClause",
+    "ForClause",
+    "CaseClause",
+    "Block",
+    "Subshell",
+    "BinaryCmd",
+    "FuncDecl",
+    "ArithmCmd",
+    "TestClause",
+    "DeclClause",
+    "LetClause",
+    "TimeClause",
+    "CoprocClause",
+    "TestDecl",
+]
+
+
+class CallExpr(TypedDict):
+    Type: Literal["CallExpr"]
+    Pos: Pos
+    End: Pos
+    Assigns: NotRequired[list[Assign]]
+    Args: NotRequired[list[Word]]
+
+
+def is_call_expr(cmd: Command) -> TypeGuard[CallExpr]:
+    return cmd["Type"] == "CallExpr"
+
+
+class IfClause(TypedDict):
+    Type: Literal["IfClause"]
+    Pos: Pos
+    End: Pos
+    Position: Pos
+    ThenPos: NotRequired[Pos]
+    FiPos: NotRequired[Pos]
+    Cond: list[Stmt]
+    CondLast: NotRequired[list[Comment]]
+    Then: list[Stmt]
+    ThenLast: NotRequired[list[Comment]]
+    Else: NotRequired[IfClause | ElseClause]
+    Last: NotRequired[list[Comment]]
+
+
+def is_if_clause(cmd: Command) -> TypeGuard[IfClause]:
+    return cmd["Type"] == "IfClause"
+
+
+class ElseClause(TypedDict):
+    Pos: Pos
+    End: Pos
+    Position: Pos
+    FiPos: NotRequired[Pos]
+    Then: list[Stmt]
+    ThenLast: NotRequired[list[Comment]]
+    Last: NotRequired[list[Comment]]
+
+
+def is_else_clause(clause: IfClause | ElseClause) -> TypeGuard[ElseClause]:
+    return "Type" not in clause
+
+
+class WhileClause(TypedDict):
+    Type: Literal["WhileClause"]
+    Pos: Pos
+    End: Pos
+    WhilePos: Pos
+    DoPos: Pos
+    DonePos: Pos
+    Cond: list[Stmt]
+    CondLast: NotRequired[list[Comment]]
+    Do: list[Stmt]
+    DoLast: NotRequired[list[Comment]]
+
+
+def is_while_clause(cmd: Command) -> TypeGuard[WhileClause]:
+    return cmd["Type"] == "WhileClause"
+
+
+Loop = Union["WordIter", "CStyleLoop"]
+
+
+class WordIter(TypedDict):
+    Type: Literal["WordIter"]
+    Pos: Pos
+    End: Pos
+    Name: Lit
+    InPos: Pos
+    Items: list[Word]
+
+
+def is_word_iter(loop: Loop) -> TypeGuard[WordIter]:
+    return loop["Type"] == "WordIter"
+
+
+class CStyleLoop(TypedDict):
+    Type: Literal["CStyleLoop"]
+    Pos: Pos
+    End: Pos
+    Lparen: Pos
+    Rparen: Pos
+    Init: NotRequired[ArithmExpr]
+    Cond: NotRequired[ArithmExpr]
+    Post: NotRequired[ArithmExpr]
+
+
+def is_cstyle_loop(loop: Loop) -> TypeGuard[CStyleLoop]:
+    return loop["Type"] == "CStyleLoop"
+
+
+class ForClause(TypedDict):
+    Type: Literal["ForClause"]
+    Pos: Pos
+    End: Pos
+    ForPos: Pos
+    DoPos: Pos
+    DonePos: Pos
+    Select: NotRequired[bool]
+    Braces: NotRequired[bool]
+    Loop: Loop
+    Do: list[Stmt]
+    DoLast: NotRequired[list[Comment]]
+
+
+def is_for_clause(cmd: Command) -> TypeGuard[ForClause]:
+    return cmd["Type"] == "ForClause"
+
+
+CaseOperator = int
+
+
+class CaseOperators(Enum):
+    Break = 30  # ;;
+    Fallthrough = 31  # ;&
+    Resume = 32  # ;;&
+    ResumeKorn = 33  # ;|
+
+
+class CaseItem(TypedDict):
+    Pos: Pos
+    End: Pos
+    Op: CaseOperator
+    OpPos: Pos
+    Comments: NotRequired[list[Comment]]
+    Patterns: list[Word]
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
+
+
+class CaseClause(TypedDict):
+    Type: Literal["CaseClause"]
+    Pos: Pos
+    End: Pos
+    Case: Pos
+    In: Pos
+    Esac: Pos
+    Braces: NotRequired[bool]
+    Word: Word
+    Items: list[CaseItem]
+    Last: NotRequired[list[Comment]]
+
+
+def is_case_clause(cmd: Command) -> TypeGuard[CaseClause]:
+    return cmd["Type"] == "CaseClause"
+
+
+class Block(TypedDict):
+    Type: Literal["Block"]
+    Pos: Pos
+    End: Pos
+    Lbrace: Pos
+    Rbrace: Pos
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
+
+
+def is_block(cmd: Command) -> TypeGuard[Block]:
+    return cmd["Type"] == "Block"
+
+
+class Subshell(TypedDict):
+    Type: Literal["Subshell"]
+    Pos: Pos
+    End: Pos
+    Lparen: Pos
+    Rparen: Pos
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
+
+
+def is_subshell(cmd: Command) -> TypeGuard[Subshell]:
+    return cmd["Type"] == "Subshell"
+
+
+BinCmdOperator = int
+
+
+class BinCmdOperators(Enum):
+    AndStmt = 10  # &&
+    OrStmt = 11  # ||
+    Pipe = 12  # |
+    PipeAll = 13  # |&
+
+
+class BinaryCmd(TypedDict):
+    Type: Literal["BinaryCmd"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: BinCmdOperator
+    X: Stmt
+    Y: Stmt
+
+
+def is_binary_cmd(cmd: Command) -> TypeGuard[BinaryCmd]:
+    return cmd["Type"] == "BinaryCmd"
+
+
+class FuncDecl(TypedDict):
+    Type: Literal["FuncDecl"]
+    Pos: Pos
+    End: Pos
+    Position: Pos
+    RsrvWord: NotRequired[bool]
+    Parens: NotRequired[bool]
+    Name: Lit
+    Body: Stmt
+
+
+def is_func_decl(cmd: Command) -> TypeGuard[FuncDecl]:
+    return cmd["Type"] == "FuncDecl"
+
+
+class ArithmCmd(TypedDict):
+    Type: Literal["ArithmCmd"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    Unsigned: NotRequired[bool]
+    X: ArithmExpr
+
+
+def is_arithm_cmd(cmd: Command) -> TypeGuard[ArithmCmd]:
+    return cmd["Type"] == "ArithmCmd"
+
+
+TestExpr = Union["BinaryTest", "UnaryTest", "ParenTest", "Word"]
+
+BinTestOperator = int
+
+
+class BinTestOperators(Enum):
+    TsReMatch = 112  # =~
+    TsNewer = 113  # -nt
+    TsOlder = 114  # -ot
+    TsDevIno = 115  # -ef
+    TsEql = 116  # -eq
+    TsNeq = 117  # -ne
+    TsLeq = 118  # -le
+    TsGeq = 119  # -ge
+    TsLss = 120  # -lt
+    TsGtr = 121  # -gt
+    AndTest = 10  # &&
+    OrTest = 11  # ||
+    TsMatchShort = 74  # =
+    TsMatch = 40  # ==
+    TsNoMatch = 41  # !=
+    TsBefore = 56  # <
+    TsAfter = 54  # >
+
+
+class BinaryTest(TypedDict):
+    Type: Literal["BinaryTest"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: BinTestOperator
+    X: TestExpr
+    Y: TestExpr
+
+
+def is_binary_test(test_expr: TestExpr) -> TypeGuard[BinaryTest]:
+    return test_expr.get("Type", "") == "BinaryTest"
+
+
+UnTestOperator = int
+
+
+class UnTestOperators(Enum):
+    TsExists = 88  # -e
+    TsRegFile = 89  # -f
+    TsDirect = 90  # -d
+    TsCharSp = 91  # -c
+    TsBlckSp = 92  # -b
+    TsNmPipe = 93  # -p
+    TsSocket = 94  # -S
+    TsSmbLink = 95  # -L
+    TsSticky = 96  # -k
+    TsGIDSet = 97  # -g
+    TsUIDSet = 98  # -u
+    TsGrpOwn = 99  # -G
+    TsUsrOwn = 100  # -O
+    TsModif = 101  # -N
+    TsRead = 102  # -r
+    TsWrite = 103  # -w
+    TsExec = 104  # -x
+    TsNoEmpty = 105  # -s
+    TsFdTerm = 106  # -t
+    TsEmpStr = 107  # -z
+    TsNempStr = 108  # -n
+    TsOptSet = 109  # -o
+    TsVarSet = 110  # -v
+    TsRefVar = 111  # -R
+    TsNot = 34  # !
+
+
+class UnaryTest(TypedDict):
+    Type: Literal["UnaryTest"]
+    Pos: Pos
+    End: Pos
+    OpPos: Pos
+    Op: UnTestOperator
+    X: TestExpr
+
+
+def is_unary_test(test_expr: TestExpr) -> TypeGuard[UnaryTest]:
+    return test_expr.get("Type", "") == "UnaryTest"
+
+
+class ParenTest(TypedDict):
+    Type: Literal["ParenTest"]
+    Pos: Pos
+    End: Pos
+    Lparen: Pos
+    Rparen: Pos
+    X: TestExpr
+
+
+def is_paren_test(test_expr: TestExpr) -> TypeGuard[ParenTest]:
+    return test_expr.get("Type", "") == "ParenTest"
+
+
+def is_word_test(test_expr: TestExpr) -> TypeGuard[Word]:
+    return "Type" not in test_expr
+
+
+class TestClause(TypedDict):
+    Type: Literal["TestClause"]
+    Pos: Pos
+    End: Pos
+    Left: Pos
+    Right: Pos
+    X: TestExpr
+
+
+def is_test_clause(cmd: Command) -> TypeGuard[TestClause]:
+    return cmd["Type"] == "TestClause"
+
+
+class DeclClause(TypedDict):
+    Type: Literal["DeclClause"]
+    Pos: Pos
+    End: Pos
+    Variant: Lit
+    Args: list[Assign]
+
+
+def is_decl_clause(cmd: Command) -> TypeGuard[DeclClause]:
+    return cmd["Type"] == "DeclClause"
+
+
+class LetClause(TypedDict):
+    Type: Literal["LetClause"]
+    Pos: Pos
+    End: Pos
+    Let: Pos
+    Exprs: list[ArithmExpr]
+
+
+def is_let_clause(cmd: Command) -> TypeGuard[LetClause]:
+    return cmd["Type"] == "LetClause"
+
+
+class TimeClause(TypedDict):
+    Type: Literal["TimeClause"]
+    Pos: Pos
+    End: Pos
+    Time: Pos
+    PosixFormat: NotRequired[bool]
+    Stmt: Stmt
+
+
+def is_time_clause(cmd: Command) -> TypeGuard[TimeClause]:
+    return cmd["Type"] == "TimeClause"
+
+
+class CoprocClause(TypedDict):
+    Type: Literal["CoprocClause"]
+    Pos: Pos
+    End: Pos
+    Coproc: Pos
+    Name: Word
+    Stmt: Stmt
+
+
+def is_coproc_clause(cmd: Command) -> TypeGuard[CoprocClause]:
+    return cmd["Type"] == "CoprocClause"
+
+
+class TestDecl(TypedDict):
+    Type: Literal["TestDecl"]
+    Pos: Pos
+    End: Pos
+    Position: Pos
+    Description: Word
+    Body: Stmt
+
+
+def is_test_decl(cmd: Command) -> TypeGuard[TestDecl]:
+    return cmd["Type"] == "TestDecl"
+
+
+class Stmt(TypedDict):
+    Comments: NotRequired[list[Comment]]
+    Cmd: Command
+    Pos: Pos
+    End: Pos
+    Position: Pos
+    Semicolon: NotRequired[Pos]
+    Negated: NotRequired[bool]
+    Background: NotRequired[bool]
+    Coprocess: NotRequired[bool]
+    Redirs: NotRequired[list[Redirect]]
+
+
+class File(TypedDict):
+    Type: Literal["File"]
+    Name: NotRequired[str]
+    Pos: Pos
+    End: Pos
+    Stmts: list[Stmt]
+    Last: NotRequired[list[Comment]]
diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
index 48ddb8e52..d6f7f9d99 100644
--- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
+++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
@@ -3,6 +3,8 @@
 
 """This module contains the BaseBuildTool class to be inherited by other specific Build Tools."""
 
+from __future__ import annotations
+
 import glob
 import itertools
 import json
@@ -14,14 +16,16 @@
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import TypedDict
+from typing import TYPE_CHECKING, TypedDict
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.config.defaults import defaults
 from macaron.dependency_analyzer.cyclonedx import DependencyAnalyzer, NoneDependencyAnalyzer
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.checks.check_result import Confidence, Evidence, EvidenceWeightMap
 
+if TYPE_CHECKING:
+    from macaron.code_analyzer.dataflow_analysis.core import Node
+
 logger: logging.Logger = logging.getLogger(__name__)
 
 
@@ -57,7 +61,7 @@ class BuildToolCommand(TypedDict):
     ci_path: str
 
     #: The CI step object that calls the command.
-    step_node: BaseNode | None
+    step_node: Node | None
 
     #: The list of name of reachable variables that contain secrets."""
     reachable_secrets: list[str]
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index fd1260474..bf3693a78 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -5,27 +5,26 @@
 
 import logging
 import os
-from typing import cast
 
 from sqlalchemy import ForeignKey
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import String
 
+from macaron.code_analyzer.dataflow_analysis.analysis import get_build_tool_commands, get_containing_github_job
+from macaron.code_analyzer.dataflow_analysis.core import traverse_bfs
+from macaron.code_analyzer.dataflow_analysis.github import (
+    GitHubActionsActionStepNode,
+    GitHubActionsReusableWorkflowCallNode,
+    GitHubActionsRunStepNode,
+)
 from macaron.database.table_definitions import CheckFacts
 from macaron.errors import CallGraphError, ProvenanceError
-from macaron.parsers.bashparser import BashNode
-from macaron.parsers.github_workflow_model import ActionStep
 from macaron.provenance.provenance_extractor import ProvenancePredicate
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext, store_inferred_build_info_results
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
 from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService, NoneCIService
 from macaron.slsa_analyzer.ci_service.circleci import CircleCI
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubJobNode,
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-)
 from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
 from macaron.slsa_analyzer.ci_service.travis import Travis
 from macaron.slsa_analyzer.registry import registry
@@ -147,95 +146,94 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                 if isinstance(ci_service, NoneCIService):
                     continue
 
+                callgraph = ci_info["callgraph"]
+
                 trusted_deploy_actions = tool.ci_deploy_kws["github_actions"] or []
 
                 # Check for use of a trusted GitHub Actions workflow to publish/deploy.
                 # TODO: verify that deployment is legitimate and not a test
                 if trusted_deploy_actions:
-                    for callee in ci_info["callgraph"].bfs():
-                        if isinstance(callee, GitHubWorkflowNode) and callee.node_type in [
-                            GitHubWorkflowType.EXTERNAL,
-                            GitHubWorkflowType.REUSABLE,
-                        ]:
-                            workflow_name = callee.name.split("@")[0]
-
-                            if not workflow_name:
-                                logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-                                continue
-                            if workflow_name in trusted_deploy_actions:
-                                job_id = None
-                                step_id = None
-                                step_name = None
-                                caller_path = ""
-                                job = callee.caller
-
-                                # We always expect the caller of the node that calls a third-party
-                                # or Reusable GitHub Action to be a GitHubJobNode.
-                                if not isinstance(job, GitHubJobNode):
-                                    continue
-
-                                job_id = job.parsed_obj.id
-                                caller_path = job.source_path
-
-                                # Only third-party Actions can be called from a step.
-                                # Reusable workflows have to be directly called from the job.
-                                # See https://docs.github.com/en/actions/sharing-automations/ \
-                                # reusing-workflows#calling-a-reusable-workflow
-                                if callee.node_type == GitHubWorkflowType.EXTERNAL:
-                                    callee_step_obj = cast(ActionStep, callee.parsed_obj)
-                                    if "id" in callee_step_obj:
-                                        step_id = callee_step_obj["id"]
-                                    if "name" in callee_step_obj:
-                                        step_name = callee_step_obj["name"]
-
-                                trigger_link = ci_service.api_client.get_file_link(
-                                    ctx.component.repository.full_name,
-                                    ctx.component.repository.commit_sha,
-                                    file_path=(
-                                        ci_service.api_client.get_relative_path_of_workflow(
-                                            os.path.basename(caller_path)
-                                        )
-                                        if caller_path
-                                        else ""
-                                    ),
-                                )
+                    for root in ci_info["callgraph"].root_nodes:
+                        for callee in traverse_bfs(root):
+                            if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)):
+                                workflow_name = callee.uses_name
+
+                                if workflow_name in trusted_deploy_actions:
+                                    job_id = None
+                                    step_id = None
+                                    step_name = None
+                                    caller_path = ""
+                                    job = (
+                                        get_containing_github_job(callee, callgraph.parents)
+                                        if isinstance(callee, GitHubActionsActionStepNode)
+                                        else callee
+                                    )
 
-                                trusted_workflow_confidence = tool.infer_confidence_deploy_workflow(
-                                    ci_path=caller_path, provenance_workflow=prov_workflow
-                                )
-                                # Store or update the inferred build information if the confidence
-                                # for the current check fact is bigger than the maximum score.
-                                if (
-                                    not result_tables
-                                    or trusted_workflow_confidence
-                                    > max(result_tables, key=lambda item: item.confidence).confidence
-                                ):
-                                    store_inferred_build_info_results(
-                                        ctx=ctx,
-                                        ci_info=ci_info,
-                                        ci_service=ci_service,
-                                        trigger_link=trigger_link,
-                                        job_id=job_id,
-                                        step_id=step_id,
-                                        step_name=step_name,
-                                        callee_node_type=callee.node_type.value,
+                                    if not job:
+                                        continue
+
+                                    job_id = job.job_id
+                                    caller_path = job.context.ref.workflow_context.ref.source_filepath
+
+                                    # Only third-party Actions can be called from a step.
+                                    # Reusable workflows have to be directly called from the job.
+                                    # See https://docs.github.com/en/actions/sharing-automations/ \
+                                    # reusing-workflows#calling-a-reusable-workflow
+                                    if isinstance(callee, GitHubActionsActionStepNode):
+                                        callee_node_type = "external"
+                                        if "id" in callee.definition:
+                                            step_id = callee.definition["id"]
+                                        if "name" in callee.definition:
+                                            step_name = callee.definition["name"]
+                                    else:
+                                        callee_node_type = "reusable"
+
+                                    trigger_link = ci_service.api_client.get_file_link(
+                                        ctx.component.repository.full_name,
+                                        ctx.component.repository.commit_sha,
+                                        file_path=(
+                                            ci_service.api_client.get_relative_path_of_workflow(
+                                                os.path.basename(caller_path)
+                                            )
+                                            if caller_path
+                                            else ""
+                                        ),
                                     )
-                                result_tables.append(
-                                    BuildAsCodeFacts(
-                                        build_tool_name=tool.name,
-                                        ci_service_name=ci_service.name,
-                                        build_trigger=trigger_link,
-                                        language=tool.language.value,
-                                        deploy_command=workflow_name,
-                                        confidence=trusted_workflow_confidence,
+
+                                    trusted_workflow_confidence = tool.infer_confidence_deploy_workflow(
+                                        ci_path=caller_path, provenance_workflow=prov_workflow
                                     )
-                                )
-                                overall_res = CheckResultType.PASSED
-                try:
-                    for build_command in ci_service.get_build_tool_commands(
-                        callgraph=ci_info["callgraph"], build_tool=tool
-                    ):
+                                    # Store or update the inferred build information if the confidence
+                                    # for the current check fact is bigger than the maximum score.
+                                    if (
+                                        not result_tables
+                                        or trusted_workflow_confidence
+                                        > max(result_tables, key=lambda item: item.confidence).confidence
+                                    ):
+                                        store_inferred_build_info_results(
+                                            ctx=ctx,
+                                            ci_info=ci_info,
+                                            ci_service=ci_service,
+                                            trigger_link=trigger_link,
+                                            job_id=job_id,
+                                            step_id=step_id,
+                                            step_name=step_name,
+                                            callee_node_type=callee_node_type,
+                                        )
+                                    result_tables.append(
+                                        BuildAsCodeFacts(
+                                            build_tool_name=tool.name,
+                                            ci_service_name=ci_service.name,
+                                            build_trigger=trigger_link,
+                                            language=tool.language.value,
+                                            deploy_command=workflow_name,
+                                            confidence=trusted_workflow_confidence,
+                                        )
+                                    )
+                                    overall_res = CheckResultType.PASSED
 
+                try:
+                    for build_command in get_build_tool_commands(nodes=callgraph, build_tool=tool):
                         # Yes or no with a confidence score.
                         result, confidence = tool.is_deploy_command(
                             build_command,
@@ -256,23 +254,27 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                                 not result_tables
                                 or confidence > max(result_tables, key=lambda item: item.confidence).confidence
                             ):
+                                job_id = None
+                                step_id = None
+                                step_name = None
+                                step_node = build_command["step_node"]
+                                if step_node:
+                                    job_node = get_containing_github_job(step_node, callgraph.parents)
+                                    if job_node is not None:
+                                        job_id = job_node.job_id
+
+                                    if isinstance(step_node, GitHubActionsRunStepNode):
+                                        step_id = step_node.definition.get("id")
+                                        step_name = step_node.definition.get("name")
+
                                 store_inferred_build_info_results(
                                     ctx=ctx,
                                     ci_info=ci_info,
                                     ci_service=ci_service,
                                     trigger_link=trigger_link,
-                                    job_id=(
-                                        build_command["step_node"].caller.name
-                                        if build_command["step_node"]
-                                        and isinstance(build_command["step_node"].caller, GitHubJobNode)
-                                        else None
-                                    ),
-                                    step_id=build_command["step_node"].node_id if build_command["step_node"] else None,
-                                    step_name=(
-                                        build_command["step_node"].name
-                                        if isinstance(build_command["step_node"], BashNode)
-                                        else None
-                                    ),
+                                    job_id=job_id,
+                                    step_id=step_id,
+                                    step_name=step_name,
                                 )
                             result_tables.append(
                                 BuildAsCodeFacts(
diff --git a/src/macaron/slsa_analyzer/checks/build_script_check.py b/src/macaron/slsa_analyzer/checks/build_script_check.py
index ccd61cca1..76374eed1 100644
--- a/src/macaron/slsa_analyzer/checks/build_script_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_script_check.py
@@ -10,6 +10,7 @@
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import String
 
+from macaron.code_analyzer.dataflow_analysis.analysis import get_build_tool_commands
 from macaron.database.table_definitions import CheckFacts
 from macaron.errors import CallGraphError
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
@@ -114,9 +115,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                 if isinstance(ci_service, NoneCIService):
                     continue
                 try:
-                    for build_command in ci_service.get_build_tool_commands(
-                        callgraph=ci_info["callgraph"], build_tool=tool
-                    ):
+                    for build_command in get_build_tool_commands(ci_info["callgraph"], tool):
                         trigger_link = ci_service.api_client.get_file_link(
                             ctx.component.repository.full_name,
                             ctx.component.repository.commit_sha,
diff --git a/src/macaron/slsa_analyzer/checks/build_service_check.py b/src/macaron/slsa_analyzer/checks/build_service_check.py
index cea689a7c..f2439d55a 100644
--- a/src/macaron/slsa_analyzer/checks/build_service_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_service_check.py
@@ -10,6 +10,7 @@
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import String
 
+from macaron.code_analyzer.dataflow_analysis.analysis import get_build_tool_commands
 from macaron.database.table_definitions import CheckFacts
 from macaron.errors import CallGraphError
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext, store_inferred_build_info_results
@@ -118,9 +119,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
                     continue
 
                 try:
-                    for build_command in ci_service.get_build_tool_commands(
-                        callgraph=ci_info["callgraph"], build_tool=tool
-                    ):
+                    for build_command in get_build_tool_commands(nodes=ci_info["callgraph"], build_tool=tool):
                         # Yes or no with a confidence score.
                         result, confidence = tool.is_package_command(
                             build_command, ci_service.get_third_party_configurations()
diff --git a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py
index 967946bf1..48c6d445e 100644
--- a/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py
+++ b/src/macaron/slsa_analyzer/checks/github_actions_vulnerability_check.py
@@ -9,6 +9,12 @@
 from sqlalchemy import ForeignKey, String
 from sqlalchemy.orm import Mapped, mapped_column
 
+from macaron.code_analyzer.dataflow_analysis.analysis import get_containing_github_job
+from macaron.code_analyzer.dataflow_analysis.core import traverse_bfs
+from macaron.code_analyzer.dataflow_analysis.github import (
+    GitHubActionsActionStepNode,
+    GitHubActionsReusableWorkflowCallNode,
+)
 from macaron.database.db_custom_types import DBJsonList
 from macaron.database.table_definitions import CheckFacts
 from macaron.errors import APIAccessError
@@ -16,7 +22,6 @@
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
 from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType
 from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import GitHubWorkflowNode, GitHubWorkflowType
 from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService
 from macaron.slsa_analyzer.registry import registry
 from macaron.slsa_analyzer.slsa_req import ReqName
@@ -87,47 +92,47 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
 
         external_workflows: dict[str, list] = {}
         for ci_info in ci_services:
-            for callee in ci_info["callgraph"].bfs():
-                if isinstance(callee, GitHubWorkflowNode) and callee.node_type in {
-                    GitHubWorkflowType.EXTERNAL,
-                    GitHubWorkflowType.REUSABLE,
-                }:
-                    workflow_name = workflow_version = ""
-                    if "@" in callee.name:
-                        workflow_name, workflow_version = callee.name.split("@")
-                    else:
-                        # Most likely we have encountered an internal reusable workflow, which
-                        # can be skipped.
-                        logger.debug("GitHub Actions workflow %s misses a version. Skipping...", callee.name)
-                        continue
-
-                    caller_path = callee.caller.source_path if callee.caller else None
-
-                    # Skip the workflow if `workflow_name` or `workflow_version` are missing,
-                    # or if `callee.name` lacks an '@' which can indicate an internal workflow
-                    # within the same repo .
-                    if not workflow_name or not workflow_version:
-                        logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-                        continue
-
-                    ext_workflow: list = external_workflows.get(workflow_name, [])
-                    ext_workflow.append(
-                        {
-                            "version": workflow_version,
-                            "caller_path": ci_info["service"].api_client.get_file_link(
-                                ctx.component.repository.full_name,
-                                ctx.component.repository.commit_sha,
-                                file_path=(
-                                    ci_info["service"].api_client.get_relative_path_of_workflow(
-                                        os.path.basename(caller_path)
-                                    )
-                                    if caller_path
-                                    else ""
+            callgraph = ci_info["callgraph"]
+            for root in callgraph.root_nodes:
+                for callee in traverse_bfs(root):
+                    if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)):
+                        workflow_name = callee.uses_name
+                        workflow_version = callee.uses_version
+                        if workflow_version is None:
+                            # Most likely we have encountered an internal reusable workflow, which
+                            # can be skipped.
+                            logger.debug("GitHub Actions workflow %s misses a version. Skipping...", workflow_name)
+                            continue
+
+                        job = (
+                            get_containing_github_job(callee, callgraph.parents)
+                            if isinstance(callee, GitHubActionsActionStepNode)
+                            else callee
+                        )
+
+                        if not job:
+                            continue
+
+                        caller_path = job.context.ref.workflow_context.ref.source_filepath
+
+                        ext_workflow: list = external_workflows.get(workflow_name, [])
+                        ext_workflow.append(
+                            {
+                                "version": workflow_version,
+                                "caller_path": ci_info["service"].api_client.get_file_link(
+                                    ctx.component.repository.full_name,
+                                    ctx.component.repository.commit_sha,
+                                    file_path=(
+                                        ci_info["service"].api_client.get_relative_path_of_workflow(
+                                            os.path.basename(caller_path)
+                                        )
+                                        if caller_path
+                                        else ""
+                                    ),
                                 ),
-                            ),
-                        }
-                    )
-                    external_workflows[workflow_name] = ext_workflow
+                            }
+                        )
+                        external_workflows[workflow_name] = ext_workflow
 
         # If no external GitHub Actions are found, return passed result.
         if not external_workflows:
diff --git a/src/macaron/slsa_analyzer/checks/trusted_builder_l3_check.py b/src/macaron/slsa_analyzer/checks/trusted_builder_l3_check.py
index e9f629447..f6ef41014 100644
--- a/src/macaron/slsa_analyzer/checks/trusted_builder_l3_check.py
+++ b/src/macaron/slsa_analyzer/checks/trusted_builder_l3_check.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
@@ -11,16 +11,16 @@
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import String
 
+from macaron.code_analyzer.dataflow_analysis.core import traverse_bfs
+from macaron.code_analyzer.dataflow_analysis.github import (
+    GitHubActionsActionStepNode,
+    GitHubActionsReusableWorkflowCallNode,
+)
 from macaron.config.defaults import defaults
 from macaron.database.table_definitions import CheckFacts
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext, store_inferred_build_info_results
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
 from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubJobNode,
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-)
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
 from macaron.slsa_analyzer.registry import registry
 from macaron.slsa_analyzer.slsa_req import ReqName
@@ -114,37 +114,36 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
             trusted_builders = defaults.get_list("ci.github_actions", "trusted_builders", fallback=[])
 
             # Look for trusted builders called as GitHub Actions.
-            for callee in ci_info["callgraph"].bfs():
-                if isinstance(callee, GitHubWorkflowNode):
-                    workflow_name = callee.name.split("@")[0]
-
-                    # Check if the action is called as a third-party or reusable workflow.
-                    if not workflow_name or callee.node_type not in [
-                        GitHubWorkflowType.EXTERNAL,
-                        GitHubWorkflowType.REUSABLE,
-                    ]:
-                        logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-                        continue
-                    if workflow_name in trusted_builders:
-                        caller_path = callee.caller.source_path if isinstance(callee.caller, GitHubJobNode) else ""
-                        caller_link = ci_service.api_client.get_file_link(
-                            ctx.component.repository.full_name,
-                            ctx.component.repository.commit_sha,
-                            ci_service.api_client.get_relative_path_of_workflow(os.path.basename(caller_path)),
-                        )
-
-                        store_inferred_build_info_results(
-                            ctx=ctx, ci_info=ci_info, ci_service=ci_service, trigger_link=caller_link
-                        )
-
-                        found_builder = True
-                        result_values.append(
-                            {
-                                "build_tool_name": callee.name,
-                                "build_trigger": caller_link,
-                                "ci_service_name": ci_service.name,
-                            }
-                        )
+            for root in ci_info["callgraph"].root_nodes:
+                for callee in traverse_bfs(root):
+                    if isinstance(callee, (GitHubActionsReusableWorkflowCallNode, GitHubActionsActionStepNode)):
+
+                        workflow_name = callee.uses_name
+
+                        if workflow_name in trusted_builders:
+                            if isinstance(callee, GitHubActionsReusableWorkflowCallNode):
+                                caller_path = callee.context.ref.workflow_context.ref.source_filepath
+                            else:
+                                caller_path = callee.context.ref.job_context.ref.workflow_context.ref.source_filepath
+
+                            caller_link = ci_service.api_client.get_file_link(
+                                ctx.component.repository.full_name,
+                                ctx.component.repository.commit_sha,
+                                ci_service.api_client.get_relative_path_of_workflow(os.path.basename(caller_path)),
+                            )
+
+                            store_inferred_build_info_results(
+                                ctx=ctx, ci_info=ci_info, ci_service=ci_service, trigger_link=caller_link
+                            )
+
+                            found_builder = True
+                            result_values.append(
+                                {
+                                    "build_tool_name": workflow_name,
+                                    "build_trigger": caller_link,
+                                    "ci_service_name": ci_service.name,
+                                }
+                            )
 
         result_tables = [TrustedBuilderFacts(**result, confidence=Confidence.HIGH) for result in result_values]
 
diff --git a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
index adaa3ce95..9df7e8e70 100644
--- a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
+++ b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
@@ -3,15 +3,14 @@
 
 """This module contains the BaseCIService class to be inherited by a CI service."""
 
+from __future__ import annotations
+
 import logging
 import os
 from abc import abstractmethod
-from collections.abc import Iterable
 from datetime import datetime
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
-from macaron.errors import CallGraphError
-from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, BuildToolCommand
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.git_service.api_client import BaseAPIClient
 from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
 
@@ -92,7 +91,7 @@ def is_detected(
         return exists
 
     @abstractmethod
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -104,7 +103,7 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest
             The call graph built for the CI.
         """
         raise NotImplementedError
@@ -245,31 +244,6 @@ def workflow_run_deleted(self, timestamp: datetime) -> bool:
         """
         return False
 
-    def get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """
-        Traverse the callgraph and find all the reachable build tool commands.
-
-        Parameters
-        ----------
-        callgraph: CallGraph
-            The callgraph reachable from the CI workflows.
-        build_tool: BaseBuildTool
-            The corresponding build tool for which shell commands need to be detected.
-
-        Yields
-        ------
-        BuildToolCommand
-            The object that contains the build command as well useful contextual information.
-
-        Raises
-        ------
-        CallGraphError
-            Error raised when an error occurs while traversing the callgraph.
-        """
-        # By default we assume that there is no callgraph available for a CI service.
-        # Each CI service should override this method if a callgraph is generated for it.
-        raise CallGraphError("There is no callgraph for this CI service.")
-
     def get_third_party_configurations(self) -> list[str]:
         """Get the list of third-party CI configuration files.
 
@@ -309,7 +283,7 @@ def load_defaults(self) -> None:
     def set_api_client(self) -> None:
         """Set the API client using the personal access token."""
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -321,33 +295,10 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest
             The call graph built for the CI.
         """
-        return CallGraph(BaseNode(), "")
-
-    def get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """
-        Traverse the callgraph and find all the reachable build tool commands.
-
-        Parameters
-        ----------
-        callgraph: CallGraph
-            The callgraph reachable from the CI workflows.
-        build_tool: BaseBuildTool
-            The corresponding build tool for which shell commands need to be detected.
-
-        Yields
-        ------
-        BuildToolCommand
-            The object that contains the build command as well useful contextual information.
-
-        Raises
-        ------
-        CallGraphError
-            Error raised when an error occurs while traversing the callgraph.
-        """
-        raise CallGraphError("There is no callgraph for this CI service.")
+        return NodeForest([])
 
     def has_latest_run_passed(
         self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
diff --git a/src/macaron/slsa_analyzer/ci_service/circleci.py b/src/macaron/slsa_analyzer/ci_service/circleci.py
index 1ac05bd86..72a838218 100644
--- a/src/macaron/slsa_analyzer/ci_service/circleci.py
+++ b/src/macaron/slsa_analyzer/ci_service/circleci.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module analyze Circle CI."""
 
+from __future__ import annotations
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.config.defaults import defaults
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 
@@ -42,7 +43,7 @@ def load_defaults(self) -> None:
     def set_api_client(self) -> None:
         """Set the API client using the personal access token."""
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -54,10 +55,10 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest
             The call graph built for the CI.
         """
-        return CallGraph(BaseNode(), "")
+        return NodeForest([])
 
     def has_latest_run_passed(
         self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py b/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py
deleted file mode 100644
index 3c234d755..000000000
--- a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py
+++ /dev/null
@@ -1,801 +0,0 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
-
-"""This module provides the intermediate representations and analysis functions for GitHub Actions."""
-
-import logging
-import os
-import re
-from collections.abc import Iterable
-from dataclasses import dataclass
-from enum import Enum
-from typing import Any, TypeGuard, cast
-
-from macaron.code_analyzer.call_graph import BaseNode
-from macaron.config.global_config import global_config
-from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError
-from macaron.parsers.actionparser import get_step_input
-from macaron.parsers.actionparser import parse as parse_action
-from macaron.parsers.bashparser import BashNode, BashScriptType, create_bash_node
-from macaron.parsers.github_workflow_model import (
-    ActionStep,
-    Identified,
-    Job,
-    NormalJob,
-    ReusableWorkflowCallJob,
-    Step,
-    Workflow,
-    is_action_step,
-    is_normal_job,
-    is_reusable_workflow_call_job,
-)
-from macaron.slsa_analyzer.build_tool.language import BuildLanguage, Language
-
-logger: logging.Logger = logging.getLogger(__name__)
-
-
-@dataclass(frozen=True)
-class ThirdPartyAction:
-    """The representation for a third-party GitHub Action."""
-
-    #: The name of the GitHub Action.
-    action_name: str
-
-    #: The version of the GitHub Action.
-    action_version: str | None
-
-
-class GitHubWorkflowType(str, Enum):
-    """This class represents different GitHub Actions workflow types."""
-
-    INTERNAL = "internal"  # Workflows declared in the repo.
-    EXTERNAL = "external"  # Third-party workflows.
-    REUSABLE = "reusable"  # Reusable workflows.
-
-
-class GitHubWorkflowNode(BaseNode):
-    """This class represents a callgraph node for GitHub Actions workflows."""
-
-    def __init__(
-        self,
-        name: str,
-        node_type: GitHubWorkflowType,
-        source_path: str,
-        parsed_obj: Workflow | Identified[ReusableWorkflowCallJob] | ActionStep,
-        model: ThirdPartyAction | None = None,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        name : str
-            Name of the workflow (or URL for reusable and external workflows).
-        node_type : GitHubWorkflowType
-            The type of workflow.
-        source_path : str
-            The path of the workflow.
-        parsed_obj : Workflow | Identified[ReusableWorkflowCallJob] | ActionStep
-            The parsed Actions workflow object. Actual type must correspond to node type.
-            (INTERNAL -> Workflow, REUSABLE -> Identified[ReusableWorkflowCallJob], EXTERNAL -> ActionStep)
-        caller: BaseNode | None
-            The caller node.
-        model: ThirdPartyAction | None
-            The static analysis abstraction for the third-party GitHub Action.
-        """
-        super().__init__(**kwargs)
-        self.name = name
-        self.node_type: GitHubWorkflowType = node_type
-        self.source_path = source_path
-        self.parsed_obj = parsed_obj
-        self.model = model
-
-    def __str__(self) -> str:
-        return f"GitHubWorkflowNode({self.name},{self.node_type})"
-
-
-class GitHubJobNode(BaseNode):
-    """This class represents a callgraph node for GitHub Actions jobs."""
-
-    def __init__(self, name: str, source_path: str, parsed_obj: Identified[Job], **kwargs: Any) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        name : str
-            Name of the workflow (or URL for reusable and external workflows).
-        source_path : str
-            The path of the workflow.
-        parsed_obj : Identified[Job]
-            The parsed Actions workflow object.
-        caller: BaseNode
-            The caller node.
-        """
-        super().__init__(**kwargs)
-        self.name = name
-        self.source_path = source_path
-        self.parsed_obj = parsed_obj
-
-    def __str__(self) -> str:
-        return f"GitHubJobNode({self.name})"
-
-
-def is_parsed_obj_workflow(
-    parsed_obj: Workflow | Identified[ReusableWorkflowCallJob] | ActionStep,
-) -> TypeGuard[Workflow]:
-    """Type guard for Workflow parsed_obj."""
-    return not isinstance(parsed_obj, Identified) and "jobs" in parsed_obj
-
-
-def is_parsed_obj_reusable_workflow_call_job(
-    obj: Workflow | Identified[ReusableWorkflowCallJob] | ActionStep,
-) -> TypeGuard[Identified[ReusableWorkflowCallJob]]:
-    """Type guard for ReusableWorkflowCallJob parsed_obj."""
-    return isinstance(obj, Identified)
-
-
-def is_parsed_obj_action_step(
-    parsed_obj: Workflow | Identified[ReusableWorkflowCallJob] | ActionStep,
-) -> TypeGuard[ActionStep]:
-    """Type guard for ActionStep parsed_obj."""
-    return not isinstance(parsed_obj, Identified) and "uses" in parsed_obj
-
-
-def find_expression_variables(value: str, exp_var: str) -> Iterable[str]:
-    """Find all the matching GitHub Actions expression variables in a string value.
-
-    GitHub Actions Expression syntax: ${{ <expression> }}
-    See https://docs.github.com/en/actions/learn-github-actions/expressions#about-expressions
-
-    Parameters
-    ----------
-    value: str
-        The value in which the expression values are searched.
-    exp_var: str
-        The expression variable name.
-
-    Yields
-    ------
-    Iterable[str]
-        The expression variable names.
-
-    Examples
-    --------
-    >>> list(find_expression_variables("echo ${{ inputs.foo }}", "inputs"))
-    ['foo']
-    >>> list(find_expression_variables("echo ${{ inputs.foo }} ${{ inputs.bar }}", "inputs"))
-    ['foo', 'bar']
-    >>> list(find_expression_variables("echo ${{ inputs.foo }} ${{ inputs.bar }}", "matric"))
-    []
-    """
-    expressions = re.findall(r"\$\{\{.*?\}\}", value)
-    pattern = r"\$\{\{\s+" + exp_var + r"\.(?P<variable>(.*?))\s+\}\}"
-    for exp in expressions:
-        match = re.match(pattern, exp)
-        if match:
-            yield match.group("variable")
-
-
-def resolve_matrix_variable(job_node: GitHubJobNode, var: str) -> Iterable[str]:
-    """Resolve the value of a GitHub Actions matrix variable.
-
-    For the specification of matrix variables in GitHub Actions see:
-    https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs
-
-    Parameters
-    ----------
-    job_node: GitHubJobNode
-        The target GitHub Actions job.
-    var: str
-        The matrix variable that needs to be resolved.
-
-    Yields
-    ------
-    str
-        The possible values of the matrix variable.
-
-    Raises
-    ------
-    GitHubActionsValueError
-        When the matrix variable cannot be found.
-    """
-    job_obj = job_node.parsed_obj.obj
-    if "strategy" not in job_obj:
-        raise GitHubActionsValueError(f"Unable to find `strategy` in {job_node.source_path} GitHub Action.")
-    if "matrix" not in job_obj["strategy"]:
-        raise GitHubActionsValueError(f"Unable to find `matrix` in {job_node.source_path} GitHub Action.")
-    matrix = job_obj["strategy"]["matrix"]
-    if not isinstance(matrix, dict):
-        raise GitHubActionsValueError(f"Unable to resolve matrix in {job_node.source_path} GitHub Action.")
-
-    matrix_vals = matrix.get(var)
-    if matrix_vals is None:
-        raise GitHubActionsValueError(f"Unable to find variable {var} in {job_node.source_path} GitHub Action.")
-
-    if isinstance(matrix_vals, list):
-        for val in matrix_vals:
-            # TODO: type of val permits dict/list, how to handle it? Just return Configuration instead of str
-            # and let the caller handle it?
-            if isinstance(val, str):
-                yield val
-            if isinstance(val, int):
-                yield str(val)
-            if isinstance(val, float):
-                yield str(val)
-            if isinstance(val, bool):
-                yield "true" if val else "false"
-    else:
-        raise GitHubActionsValueError(f"Unable to resolve matrix in {job_node.source_path} GitHub Action.")
-
-
-def is_expression(value: str) -> bool:
-    """Determine if a value is a GitHub Actions expression.
-
-    Parameters
-    ----------
-    value: str
-        The input value.
-
-    Returns
-    -------
-    bool
-        True if the input value is a GitHub Actions expression.
-
-    Examples
-    --------
-    >>> is_expression("${{ foo }}")
-    True
-    >>> is_expression("${{ foo }")
-    False
-    >>> is_expression("${ foo }")
-    False
-    """
-    return re.match(r"\$\{\{.*?\}\}", value) is not None
-
-
-def find_language_setup_action(job_node: GitHubJobNode, lang_name: BuildLanguage) -> Language | None:
-    """Find the step that calls a language setup GitHub Actions and return the model.
-
-    Parameters
-    ----------
-    job_node: GitHubJobNode
-        The target GitHub Actions job node.
-    lang_name: BuildLanguage
-        The target language used in the build.
-
-    Returns
-    -------
-    Language | None
-        The language model for the language setup GitHub Action or None.
-    """
-    for callee in job_node.callee:
-        model = callee.model
-        # Check if the model implements the Language protocol.
-        if isinstance(model, Language):
-            if model.lang_name == lang_name:
-                return model
-    return None
-
-
-def build_call_graph_from_node(node: GitHubWorkflowNode, repo_path: str) -> None:
-    """Analyze the GitHub Actions node to build the call graph.
-
-    Parameters
-    ----------
-    node : GitHubWorkflowNode
-        The node for a single GitHub Actions workflow.
-    repo_path: str
-        The file system path to the repo.
-    """
-    if not is_parsed_obj_workflow(node.parsed_obj):
-        return
-    jobs = node.parsed_obj["jobs"]
-    for job_name, job in jobs.items():
-        job_with_id = Identified[Job](job_name, job)
-        job_node = GitHubJobNode(name=job_name, source_path=node.source_path, parsed_obj=job_with_id, caller=node)
-        node.add_callee(job_node)
-
-        if is_normal_job(job):
-            # Add third-party workflows.
-            steps = job.get("steps")
-            if steps is None:
-                continue
-            for step in steps:
-                if is_action_step(step):
-                    # TODO: change source_path for external workflows.
-                    action_name = step["uses"]
-                    external_node = GitHubWorkflowNode(
-                        name=action_name,
-                        node_type=GitHubWorkflowType.EXTERNAL,
-                        source_path="",
-                        parsed_obj=step,
-                        caller=job_node,
-                    )
-                    external_node.model = create_third_party_action_model(external_node)
-                    job_node.add_callee(external_node)
-
-                # Check the shell type configuration. We currently can support `bash`` and `sh`.
-                # By default `bash`` is used on non-Windows runners, which we support.
-                # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#defaultsrunshell
-                # TODO: support Powershell for Windows runners, which is the default shell in GitHub Actions.
-                # Right now, the script with the default shell is passed to the parser, which will fail
-                # if the runner is Windows and Powershell is used. But there is no easy way to avoid passing
-                # the script because that means we need to accurately determine the runner's OS.
-                elif step.get("run") and ("shell" not in step or step["shell"] in {"bash", "sh"}):
-                    try:
-                        name = "UNKNOWN"
-                        node_id = None
-                        if "id" in step:
-                            node_id = step["id"]
-                        if "name" in step:
-                            name = step["name"]
-
-                        callee = create_bash_node(
-                            name=name,
-                            node_id=node_id,
-                            node_type=BashScriptType.INLINE,
-                            source_path=node.source_path,
-                            ci_step_ast=step,
-                            repo_path=repo_path,
-                            caller=job_node,
-                            recursion_depth=0,
-                        )
-                    except CallGraphError as error:
-                        logger.debug(error)
-                        continue
-                    job_node.add_callee(callee)
-
-        elif is_reusable_workflow_call_job(job):
-            workflow_call_job_with_id = Identified[ReusableWorkflowCallJob](job_name, job)
-            # Add reusable workflows.
-            logger.debug("Found reusable workflow: %s.", job["uses"])
-            # TODO: change source_path for reusable workflows.
-            reusable_node = GitHubWorkflowNode(
-                name=job["uses"],
-                node_type=GitHubWorkflowType.REUSABLE,
-                source_path="",
-                parsed_obj=workflow_call_job_with_id,
-                caller=job_node,
-            )
-            reusable_node.model = create_third_party_action_model(reusable_node)
-            job_node.add_callee(reusable_node)
-
-
-def build_call_graph_from_path(root: BaseNode, workflow_path: str, repo_path: str, macaron_path: str = "") -> BaseNode:
-    """Build the call Graph for GitHub Actions workflows.
-
-    At the moment it does not analyze third-party workflows to include their callees.
-
-    Parameters
-    ----------
-    root : BaseNode
-        The root call graph node.
-    workflow_path: str
-        The path to the CI workflow file.
-    repo_path: str
-        The path to the target repository.
-    macaron_path: str
-        Macaron's root path (optional).
-
-    Returns
-    -------
-    BaseNode
-        The callgraph node for the GitHub Actions workflow.
-
-    Raises
-    ------
-    ParseError
-        When parsing the workflow fails with error.
-    """
-    if not macaron_path:
-        macaron_path = global_config.macaron_path
-
-    # Parse GitHub Actions workflows.
-    logger.debug(
-        "Parsing %s",
-        workflow_path,
-    )
-    try:
-        parsed_obj: Workflow = parse_action(workflow_path)
-    except ParseError as error:
-        logger.debug("Unable to parse GitHub Actions at the target %s: %s", repo_path, error)
-        raise ParseError from error
-
-    # Add internal workflows.
-    workflow_name = os.path.basename(workflow_path)
-    workflow_node = GitHubWorkflowNode(
-        name=workflow_name,
-        node_type=GitHubWorkflowType.INTERNAL,
-        source_path=workflow_path,
-        parsed_obj=parsed_obj,
-        caller=root,
-    )
-    build_call_graph_from_node(workflow_node, repo_path=repo_path)
-
-    return workflow_node
-
-
-def get_reachable_secrets(step_node: BashNode) -> Iterable[str]:
-    """Get reachable secrets to a GitHub Actions step.
-
-    Parameters
-    ----------
-    step_node: BashNode
-        The target GitHub Action step node.
-
-    Yields
-    ------
-    str
-        The reachable secret variable name.
-    """
-    job_node = step_node.caller
-    if not isinstance(job_node, GitHubJobNode):
-        return
-
-    def _find_secret_keys(ast: NormalJob | ReusableWorkflowCallJob | Step | None) -> Iterable[str]:
-        if ast is None:
-            return
-        if "uses" in ast:
-            return
-        normal_job = cast(NormalJob, ast)
-        if "env" in normal_job:
-            env = normal_job["env"]
-            if isinstance(env, dict):
-                for key, val in env.items():
-                    if isinstance(val, str):
-                        if list(find_expression_variables(value=val, exp_var="secrets")):
-                            yield key
-
-    # Get reachable secrets set as environment variables in the job.
-    yield from _find_secret_keys(job_node.parsed_obj.obj)
-
-    # Get reachable secrets set as environment variables in the step.
-    if step_node.node_type == BashScriptType.INLINE:
-        yield from _find_secret_keys(step_node.parsed_step_obj)
-
-
-def get_ci_events(workflow_node: GitHubWorkflowNode) -> list[str] | None:
-    """Get the CI events that trigger the GitHub Action workflow.
-
-    Parameters
-    ----------
-    workflow_node: GitHubWorkflowNode
-        The target GitHub Action workflow node.
-
-    Returns
-    -------
-    list[str] | None
-        The list of event names or None.
-    """
-    result: list[str] = []
-    ast = workflow_node.parsed_obj
-    if not isinstance(ast, dict) or "on" not in ast:
-        raise GitHubActionsValueError(f"Unable to find `on` event in {workflow_node.source_path} GitHub Action.")
-
-    on = cast(Workflow, ast)["on"]
-
-    if isinstance(on, str):
-        result.append(on)
-    elif isinstance(on, list):
-        for hook in on:
-            result.append(hook)
-    else:
-        for key in on:
-            result.append(key)
-
-    return result
-
-
-class SetupJava(Language, ThirdPartyAction):
-    """This class models the official setup-java GitHub Action from GitHub.
-
-    For the table of supported distributions see:
-    https://github.com/actions/setup-java?tab=readme-ov-file#supported-distributions
-    """
-
-    #: Name of the GitHub Action.
-    action_name = "actions/setup-java"
-
-    #: Version of the GitHub Action.
-    action_version: None
-
-    def __init__(self, external_node: GitHubWorkflowNode):
-        """Initialize the setup-java GitHub Action model.
-
-        Parameters
-        ----------
-        external_node: GitHubWorkflowNode
-            The external GitHub Action workflow node.
-        """
-        # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj
-        step = external_node.parsed_obj
-        if not is_parsed_obj_action_step(step):
-            raise ValueError("Expected an action step node")
-        self._lang_name = BuildLanguage.JAVA
-        self._lang_distributions = None
-        self._lang_versions = None
-        self._lang_url = "https://github.com/actions/setup-java"
-        lang_distribution_exp = None
-        lang_version_exp = None
-        if distribution := get_step_input(step, key="distribution"):
-            if not is_expression(distribution):
-                self._lang_distributions = [distribution]
-            else:
-                lang_distribution_exp = distribution
-        if java_version := get_step_input(step, key="java-version"):
-            if not is_expression(java_version):
-                self._lang_versions = [java_version]
-            else:
-                lang_version_exp = java_version
-        # Handle matrix values.
-        matrix_values = {}
-        if lang_distribution_exp and "matrix." in lang_distribution_exp:
-            matrix_values["lang_distribution_var"] = find_expression_variables(
-                value=lang_distribution_exp, exp_var="matrix"
-            )
-        if lang_version_exp and "matrix." in lang_version_exp:
-            matrix_values["lang_version_var"] = find_expression_variables(value=lang_version_exp, exp_var="matrix")
-
-        if matrix_values:
-            job_node = external_node.caller
-            if job_node is None:
-                logger.debug("Unable to find the caller GitHub Action job for step %s.", external_node.name)
-                return
-            try:
-                if (variables := matrix_values.get("lang_distribution_var")) is not None:
-                    values: list[str] = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_distributions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-            try:
-                if (variables := matrix_values.get("lang_version_var")) is not None:
-                    values = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_versions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-    @property
-    def lang_name(self) -> str:
-        """Get the name of the language."""
-        return self._lang_name
-
-    @property
-    def lang_versions(self) -> list[str] | None:
-        """Get the possible version of the language."""
-        return self._lang_versions
-
-    @property
-    def lang_distributions(self) -> list[str] | None:
-        """Get the possible distributions of the language."""
-        return self._lang_distributions
-
-    @property
-    def lang_url(self) -> str | None:
-        """Get the URL that provides information about the language distributions and versions."""
-        return self._lang_url
-
-
-class OracleSetupJava(Language, ThirdPartyAction):
-    """This class models the Oracle setup-java GitHub Action.
-
-    For the table of supported distributions see:
-    # https://github.com/oracle-actions/setup-java?tab=readme-ov-file#input-overview
-    """
-
-    #: Name of the GitHub Action.
-    action_name = "oracle-actions/setup-java"
-
-    #: Version of the GitHub Action.
-    action_version: None
-
-    def __init__(self, external_node: GitHubWorkflowNode):
-        """Initialize the Oracle setup-java GitHub Action model.
-
-        Parameters
-        ----------
-        external_node: GitHubWorkflowNode
-            The external GitHub Action workflow node.
-        """
-        # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj
-        step = external_node.parsed_obj
-        if not is_parsed_obj_action_step(step):
-            raise ValueError("Expected an action step node")
-        self._lang_name = BuildLanguage.JAVA
-        self._lang_distributions = None
-        self._lang_versions = None
-        self._lang_url = "https://github.com/oracle-actions/setup-java"
-        lang_distribution_exp = None
-        lang_version_exp = None
-        if website := get_step_input(step, key="website"):
-            if not is_expression(website):
-                self._lang_distributions = [website]
-            else:
-                lang_distribution_exp = website
-        if java_release := get_step_input(step, key="release"):
-            if not is_expression(java_release):
-                self._lang_versions = [java_release]
-            else:
-                lang_version_exp = java_release
-        # Handle matrix values.
-        matrix_values = {}
-        if lang_distribution_exp and "matrix." in lang_distribution_exp:
-            matrix_values["lang_distribution_var"] = find_expression_variables(
-                value=lang_distribution_exp, exp_var="matrix"
-            )
-        if lang_version_exp and "matrix." in lang_version_exp:
-            matrix_values["lang_version_var"] = find_expression_variables(value=lang_version_exp, exp_var="matrix")
-
-        if matrix_values:
-            job_node = external_node.caller
-            if job_node is None:
-                logger.debug("Unable to find the caller GitHub Action job for step %s.", external_node.name)
-                return
-            try:
-                if (variables := matrix_values.get("lang_distribution_var")) is not None:
-                    values: list[str] = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_distributions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-            try:
-                if (variables := matrix_values.get("lang_version_var")) is not None:
-                    values = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_versions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-    @property
-    def lang_name(self) -> str:
-        """Get the name of the language."""
-        return self._lang_name
-
-    @property
-    def lang_versions(self) -> list[str] | None:
-        """Get the possible version of the language."""
-        return self._lang_versions
-
-    @property
-    def lang_distributions(self) -> list[str] | None:
-        """Get the possible distributions of the language."""
-        return self._lang_distributions
-
-    @property
-    def lang_url(self) -> str | None:
-        """Get the URL that provides information about the language distributions and versions."""
-        return self._lang_url
-
-
-class GraalVMSetup(Language, ThirdPartyAction):
-    """This class models the GraalVM setup GitHub Action from GitHub.
-
-    For the table of supported distributions see:
-    https://github.com/graalvm/setup-graalvm
-    """
-
-    #: Name of the GitHub Action.
-    action_name = "graalvm/setup-graalvm"
-
-    #: Version of the GitHub Action.
-    action_version: None
-
-    def __init__(self, external_node: GitHubWorkflowNode):
-        """Initialize the setup-java GitHub Action model.
-
-        Parameters
-        ----------
-        external_node: GitHubWorkflowNode
-            The external GitHub Action workflow node.
-        """
-        # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj
-        step = external_node.parsed_obj
-        if not is_parsed_obj_action_step(step):
-            raise ValueError("Expected an action step node")
-        self._lang_name = BuildLanguage.JAVA
-        self._lang_distributions = None
-        self._lang_versions = None
-        self._lang_url = "https://github.com/graalvm/setup-graalvm"
-        lang_distribution_exp = None
-        lang_version_exp = None
-        if distribution := get_step_input(step, key="distribution"):
-            if not is_expression(distribution):
-                self._lang_distributions = [distribution]
-            else:
-                lang_distribution_exp = distribution
-        if java_version := get_step_input(step, key="java-version"):
-            if not is_expression(java_version):
-                self._lang_versions = [java_version]
-            else:
-                lang_version_exp = java_version
-        # Handle matrix values.
-        matrix_values = {}
-        if lang_distribution_exp and "matrix." in lang_distribution_exp:
-            matrix_values["lang_distribution_var"] = find_expression_variables(
-                value=lang_distribution_exp, exp_var="matrix"
-            )
-        if lang_version_exp and "matrix." in lang_version_exp:
-            matrix_values["lang_version_var"] = find_expression_variables(value=lang_version_exp, exp_var="matrix")
-
-        if matrix_values:
-            job_node = external_node.caller
-            if job_node is None:
-                logger.debug("Unable to find the caller GitHub Action job for step %s.", external_node.name)
-                return
-            try:
-                if (variables := matrix_values.get("lang_distribution_var")) is not None:
-                    values: list[str] = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_distributions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-            try:
-                if (variables := matrix_values.get("lang_version_var")) is not None:
-                    values = []
-                    for var in variables:
-                        values.extend(resolve_matrix_variable(job_node, var))
-                    if values:
-                        self._lang_versions = values
-            except GitHubActionsValueError as error:
-                logger.debug(error)
-
-    @property
-    def lang_name(self) -> str:
-        """Get the name of the language."""
-        return self._lang_name
-
-    @property
-    def lang_versions(self) -> list[str] | None:
-        """Get the possible version of the language."""
-        return self._lang_versions
-
-    @property
-    def lang_distributions(self) -> list[str] | None:
-        """Get the possible distributions of the language."""
-        return self._lang_distributions
-
-    @property
-    def lang_url(self) -> str | None:
-        """Get the URL that provides information about the language distributions and versions."""
-        return self._lang_url
-
-
-def create_third_party_action_model(external_node: GitHubWorkflowNode) -> ThirdPartyAction:
-    """Create an instances of third-party model object.
-
-    Parameters
-    ----------
-    external_node: GitHubWorkflowNode
-        The external GitHub Actions workflow node.
-
-    Returns
-    -------
-    ThirdPartyAction
-        An instance object for the ThirdPartyAction model.
-    """
-    action_name = external_node.name
-    action_version = None
-    if "@" in external_node.name:
-        action_name, action_version = external_node.name.split("@", maxsplit=1)
-    match action_name:
-        case "actions/setup-java":
-            return SetupJava(external_node=external_node)
-        case "oracle-actions/setup-java":
-            return OracleSetupJava(external_node=external_node)
-        case "graalvm/setup-graalvm":
-            return GraalVMSetup(external_node=external_node)
-    return ThirdPartyAction(action_name=action_name, action_version=action_version)
diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
index c0fd6aa46..b24dc5963 100644
--- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
+++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py
@@ -3,29 +3,19 @@
 
 """This module analyzes GitHub Actions CI."""
 
+from __future__ import annotations
 
 import glob
 import logging
 import os
-from collections.abc import Iterable
 from datetime import datetime, timedelta, timezone
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file
+from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest
 from macaron.config.defaults import defaults
 from macaron.config.global_config import global_config
-from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError
-from macaron.parsers.bashparser import BashNode, BashScriptType
-from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, BuildToolCommand
+from macaron.errors import GitHubActionsValueError, ParseError
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubJobNode,
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-    build_call_graph_from_path,
-    find_language_setup_action,
-    get_ci_events,
-    get_reachable_secrets,
-)
 from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client
 from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
 from macaron.slsa_analyzer.git_service.github import GitHub
@@ -386,7 +376,7 @@ def workflow_run_in_date_time_range(
                         raise GitHubActionsValueError("GitHub Actions workflow run misses jobs information.")
                     for job in run_jobs["jobs"]:
                         # If the deploy step is a Reusable Workflow, there won't be any steps in the caller job.
-                        if callee_node_type == GitHubWorkflowType.REUSABLE.value:
+                        if callee_node_type == "reusable":
                             if not job["name"].startswith(job_id) or job["conclusion"] != "success":
                                 continue
                             started_at = datetime.fromisoformat(job["started_at"])
@@ -576,7 +566,7 @@ def has_kws_in_log(self, latest_run: dict, build_log: list) -> bool:
         logger.info("No build kw in log file. Continue ...")
         return False
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for GitHub Actions workflows.
 
         At the moment it does not analyze third-party workflows to include their callees.
@@ -596,106 +586,18 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
         if not macaron_path:
             macaron_path = global_config.macaron_path
 
-        root: BaseNode = BaseNode()
-        gh_cg = CallGraph(root, repo_path)
-
         # Parse GitHub Actions workflows.
         files = self.get_workflows(repo_path)
+        nodes: list[Node] = []
         for workflow_path in files:
             try:
-                callee = build_call_graph_from_path(
-                    root=root, workflow_path=workflow_path, repo_path=repo_path, macaron_path=macaron_path
-                )
+                workflow_node = analyse_github_workflow_file(workflow_path, repo_path)
+
             except ParseError:
                 logger.debug("Skip adding workflow at %s to the callgraph.", workflow_path)
                 continue
-            root.add_callee(callee)
-        return gh_cg
-
-    def _get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """Traverse the callgraph and find all the reachable build tool commands."""
-        for node in callgraph.bfs():
-            # We are just interested in nodes that have bash commands.
-            if isinstance(node, BashNode):
-                # We collect useful contextual information for the called BashNode.
-                caller_node = node.caller
-                # The GitHub Actions workflow that triggers the path in the callgraph.
-                workflow_node = None
-                # The GitHub Actions job that triggers the path in the callgraph.
-                job_node = None
-                # The step in GitHub Actions job that triggers the path in the callgraph.
-                step_node = node if node.node_type == BashScriptType.INLINE else None
-
-                # Walk up the callgraph to find the relevant caller nodes.
-                # In GitHub Actions a `GitHubWorkflowNode` may call several `GitHubJobNode`s
-                # and a `GitHubJobNode` may call several steps, which can be external `GitHubWorkflowNode`
-                # or inlined run nodes. We currently support the run steps that call shell scripts as
-                # `BashNode`. An inlined `BashNode` can call `BashNode` as bash files.
-                # TODO: revisit this implementation if analysis of external workflows is supported in
-                # the future, and decide if setting the caller workflow and job nodes to the nodes in the
-                # main triggering workflow is still expected.
-                while caller_node is not None:
-                    match caller_node:
-                        case GitHubWorkflowNode():
-                            workflow_node = caller_node
-                        case GitHubJobNode():
-                            job_node = caller_node
-                        case BashNode(node_type=BashScriptType.INLINE):
-                            step_node = caller_node
-
-                    caller_node = caller_node.caller
-
-                # Check if there was an issue in finding any of the caller nodes.
-                if workflow_node is None or job_node is None or step_node is None:
-                    raise CallGraphError("Unable to traverse the call graph to find build commands.")
-
-                # Find the bash commands that call the build tool.
-                for cmd in node.parsed_bash_obj.get("commands", []):
-                    if build_tool.is_build_command(cmd):
-                        lang_versions = lang_distributions = lang_url = None
-                        if lang_model := find_language_setup_action(job_node, build_tool.language):
-                            lang_versions = lang_model.lang_versions
-                            lang_distributions = lang_model.lang_distributions
-                            lang_url = lang_model.lang_url
-                        yield BuildToolCommand(
-                            ci_path=workflow_node.source_path,
-                            command=cmd,
-                            step_node=step_node,
-                            language=build_tool.language,
-                            language_versions=lang_versions,
-                            language_distributions=lang_distributions,
-                            language_url=lang_url,
-                            reachable_secrets=list(get_reachable_secrets(step_node)),
-                            events=get_ci_events(workflow_node),
-                        )
-
-    def get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """Traverse the callgraph and find all the reachable build tool commands.
-
-        This generator yields sorted build tool command objects to allow a deterministic behavior.
-        The objects are sorted based on the string representation of the build tool object.
-
-        Parameters
-        ----------
-        callgraph: CallGraph
-            The callgraph reachable from the CI workflows.
-        build_tool: BaseBuildTool
-            The corresponding build tool for which shell commands need to be detected.
-
-        Yields
-        ------
-        BuildToolCommand
-            The object that contains the build command as well useful contextual information.
-
-        Raises
-        ------
-        CallGraphError
-            Error raised when an error occurs while traversing the callgraph.
-        """
-        yield from sorted(
-            self._get_build_tool_commands(callgraph=callgraph, build_tool=build_tool),
-            key=str,
-        )
+            nodes.append(workflow_node)
+        return NodeForest(nodes)
 
     def get_third_party_configurations(self) -> list[str]:
         """Get the list of third-party CI configuration files.
diff --git a/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py b/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py
index cd7e3210d..ede49002f 100644
--- a/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py
+++ b/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py
@@ -1,9 +1,11 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module analyzes GitLab CI."""
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from __future__ import annotations
+
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.config.defaults import defaults
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 
@@ -41,7 +43,7 @@ def load_defaults(self) -> None:
     def set_api_client(self) -> None:
         """Set the API client using the personal access token."""
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -53,10 +55,10 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest
             The call graph built for the CI.
         """
-        return CallGraph(BaseNode(), "")
+        return NodeForest([])
 
     def has_latest_run_passed(
         self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
diff --git a/src/macaron/slsa_analyzer/ci_service/jenkins.py b/src/macaron/slsa_analyzer/ci_service/jenkins.py
index ebef614ca..c95edb4cb 100644
--- a/src/macaron/slsa_analyzer/ci_service/jenkins.py
+++ b/src/macaron/slsa_analyzer/ci_service/jenkins.py
@@ -3,21 +3,17 @@
 
 """This module analyzes Jenkins CI."""
 
+from __future__ import annotations
+
 import glob
 import logging
 import os
 import re
-from collections.abc import Iterable
-from enum import Enum
-from typing import Any
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_bash_script
+from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest
 from macaron.config.defaults import defaults
 from macaron.config.global_config import global_config
-from macaron.errors import ParseError
-from macaron.parsers import bashparser
-from macaron.repo_verifier.repo_verifier import BaseBuildTool
-from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -66,7 +62,7 @@ def load_defaults(self) -> None:
     def set_api_client(self) -> None:
         """Set the API client using the personal access token."""
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -78,114 +74,36 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest : NodeForest
             The call graph built for the CI.
         """
         if not macaron_path:
             macaron_path = global_config.macaron_path
 
-        root: BaseNode = BaseNode()
-        call_graph = CallGraph(root, repo_path)
-
-        # To match lines that start with sh '' or sh ''' ''' (either single or triple quotes)
-        # TODO: we need to support multi-line cases.
+        # # To match lines that start with sh '' or sh ''' ''' (either single or triple quotes)
+        # # TODO: we need to support multi-line cases.
         pattern = r"^\s*sh\s+'{1,3}(.*?)'{1,3}$"
         workflow_files = self.get_workflows(repo_path)
 
+        nodes: list[Node] = []
+
         for workflow_path in workflow_files:
             try:
                 with open(workflow_path, encoding="utf-8") as wf:
                     lines = wf.readlines()
             except OSError as error:
                 logger.debug("Unable to read Jenkinsfile %s: %s", workflow_path, error)
-                return call_graph
-
-            # Add internal workflow.
-            workflow_name = os.path.basename(workflow_path)
-            workflow_node = JenkinsNode(
-                name=workflow_name,
-                node_type=JenkinsNodeType.INTERNAL,
-                source_path=workflow_path,
-                caller=root,
-            )
-            root.add_callee(workflow_node)
+                return NodeForest([])
 
             # Find matching lines.
             for line in lines:
                 match = re.match(pattern, line)
                 if not match:
                     continue
+                nodes.append(analyse_bash_script(match[1], workflow_path, repo_path))
 
-                try:
-                    parsed_bash_script = bashparser.parse(match.group(1), macaron_path=macaron_path)
-                except ParseError as error:
-                    logger.debug(error)
-                    continue
-
-                # TODO: Similar to GitHub Actions, we should enable support for recursive calls to bash scripts
-                # within Jenkinsfiles. While the implementation should be relatively straightforward, it’s
-                # recommended to first refactor the bashparser to make it agnostic to GitHub Actions.
-                bash_node = bashparser.BashNode(
-                    "jenkins_inline_cmd",
-                    bashparser.BashScriptType.INLINE,
-                    workflow_path,
-                    parsed_step_obj=None,
-                    parsed_bash_obj=parsed_bash_script,
-                    node_id=None,
-                    caller=workflow_node,
-                )
-                workflow_node.add_callee(bash_node)
-
-        return call_graph
-
-    def get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """
-        Traverse the callgraph and find all the reachable build tool commands.
-
-        Parameters
-        ----------
-        callgraph: CallGraph
-            The callgraph reachable from the CI workflows.
-        build_tool: BaseBuildTool
-            The corresponding build tool for which shell commands need to be detected.
-
-        Yields
-        ------
-        BuildToolCommand
-            The object that contains the build command as well useful contextual information.
-
-        Raises
-        ------
-        CallGraphError
-            Error raised when an error occurs while traversing the callgraph.
-        """
-        yield from sorted(
-            self._get_build_tool_commands(callgraph=callgraph, build_tool=build_tool),
-            key=str,
-        )
-
-    def _get_build_tool_commands(self, callgraph: CallGraph, build_tool: BaseBuildTool) -> Iterable[BuildToolCommand]:
-        """Traverse the callgraph and find all the reachable build tool commands."""
-        for node in callgraph.bfs():
-            # We are just interested in nodes that have bash commands.
-            if isinstance(node, bashparser.BashNode):
-                # The Jenkins configuration that triggers the path in the callgraph.
-                workflow_node = node.caller
-
-                # Find the bash commands that call the build tool.
-                for cmd in node.parsed_bash_obj.get("commands", []):
-                    if build_tool.is_build_command(cmd):
-                        yield BuildToolCommand(
-                            ci_path=workflow_node.source_path if workflow_node else "",
-                            command=cmd,
-                            step_node=None,
-                            language=build_tool.language,
-                            language_versions=None,
-                            language_distributions=None,
-                            language_url=None,
-                            reachable_secrets=[],
-                            events=None,
-                        )
+        # return call_graph
+        return NodeForest(nodes)
 
     def has_latest_run_passed(
         self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
@@ -214,41 +132,3 @@ def has_latest_run_passed(
             The feed back of the check, or empty if no passing workflow is found.
         """
         return ""
-
-
-class JenkinsNodeType(str, Enum):
-    """This class represents Jenkins node type."""
-
-    INTERNAL = "internal"  # Configurations declared in one file.
-
-
-class JenkinsNode(BaseNode):
-    """This class represents a callgraph node for Jenkinsfile configuration."""
-
-    def __init__(
-        self,
-        name: str,
-        node_type: JenkinsNodeType,
-        source_path: str,
-        **kwargs: Any,
-    ) -> None:
-        """Initialize instance.
-
-        Parameters
-        ----------
-        name : str
-            Name of the workflow.
-        node_type : JenkinsNodeType
-            The type of node.
-        source_path : str
-            The path of the workflow.
-        caller: BaseNode | None
-            The caller node.
-        """
-        super().__init__(**kwargs)
-        self.name = name
-        self.node_type: JenkinsNodeType = node_type
-        self.source_path = source_path
-
-    def __str__(self) -> str:
-        return f"JenkinsNodeType({self.name},{self.node_type})"
diff --git a/src/macaron/slsa_analyzer/ci_service/travis.py b/src/macaron/slsa_analyzer/ci_service/travis.py
index 8b34d27e8..a50936860 100644
--- a/src/macaron/slsa_analyzer/ci_service/travis.py
+++ b/src/macaron/slsa_analyzer/ci_service/travis.py
@@ -1,9 +1,11 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module analyzes Travis CI."""
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from __future__ import annotations
+
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.config.defaults import defaults
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 
@@ -41,7 +43,7 @@ def load_defaults(self) -> None:
     def set_api_client(self) -> None:
         """Set the API client using the personal access token."""
 
-    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
+    def build_call_graph(self, repo_path: str, macaron_path: str = "") -> NodeForest:
         """Build the call Graph for this CI service.
 
         Parameters
@@ -53,10 +55,10 @@ def build_call_graph(self, repo_path: str, macaron_path: str = "") -> CallGraph:
 
         Returns
         -------
-        CallGraph : CallGraph
+        NodeForest
             The call graph built for the CI.
         """
-        return CallGraph(BaseNode(), "")
+        return NodeForest([])
 
     def has_latest_run_passed(
         self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
diff --git a/src/macaron/slsa_analyzer/specs/ci_spec.py b/src/macaron/slsa_analyzer/specs/ci_spec.py
index 0f00e5bdb..ad928b792 100644
--- a/src/macaron/slsa_analyzer/specs/ci_spec.py
+++ b/src/macaron/slsa_analyzer/specs/ci_spec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the BuildSpec class."""
@@ -6,7 +6,7 @@
 from collections.abc import Sequence
 from typing import TypedDict
 
-from macaron.code_analyzer.call_graph import CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.asset import AssetLocator
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
@@ -19,7 +19,7 @@ class CIInfo(TypedDict):
     service: BaseCIService
     """The CI service data."""
 
-    callgraph: CallGraph
+    callgraph: NodeForest
     """The call graph for this CI service."""
 
     provenance_assets: list[AssetLocator]
diff --git a/tests/conftest.py b/tests/conftest.py
index 77223948f..cb2cee8c0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,14 +10,13 @@
 import pytest
 from pytest_httpserver import HTTPServer
 
-import macaron
 from macaron.build_spec_generator.cli_command_parser.gradle_cli_parser import GradleCLICommandParser
 from macaron.build_spec_generator.cli_command_parser.maven_cli_parser import MavenCLICommandParser
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.config.defaults import create_defaults, defaults, load_defaults
 from macaron.database.table_definitions import Analysis, Component, RepoFinderMetadata, Repository
-from macaron.parsers.bashparser import BashScriptType, create_bash_node
-from macaron.parsers.github_workflow_model import Identified, Job, NormalJob, RunStep, Workflow
+from macaron.parsers.github_workflow_model import NormalJob, RunStep, Workflow
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
 from macaron.slsa_analyzer.build_tool.conda import Conda
@@ -33,11 +32,6 @@
 from macaron.slsa_analyzer.build_tool.yarn import Yarn
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
 from macaron.slsa_analyzer.ci_service.circleci import CircleCI
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubJobNode,
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-)
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
 from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
 from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
@@ -489,7 +483,7 @@ def __init__(
         super().__init__(component, *args, **kwargs)
 
 
-def build_github_actions_call_graph_for_commands(commands: list[str]) -> CallGraph:
+def build_github_actions_call_graph_for_commands(commands: list[str]) -> NodeForest:
     """
     Create a dummy callgraph that calls a list of bash commands for testing.
 
@@ -498,37 +492,10 @@ def build_github_actions_call_graph_for_commands(commands: list[str]) -> CallGra
     commands: list[str]
         The list of bash commands.
     """
-    root: BaseNode = BaseNode()
-    gh_cg = CallGraph(root, "")
     run_step: RunStep = {"run": ";".join(commands)}
     job_obj: NormalJob = {"runs-on": "", "steps": [run_step]}
     workflow_obj: Workflow = {"on": "release", "jobs": {"release": job_obj}}
-    workflow_node = GitHubWorkflowNode(
-        name="",
-        node_type=GitHubWorkflowType.INTERNAL,
-        source_path="",
-        parsed_obj=workflow_obj,
-        caller=root,
-    )
-    root.add_callee(workflow_node)
-    job_obj_with_id: Identified[Job] = Identified("release", job_obj)
-    job_node = GitHubJobNode(name="", source_path="", parsed_obj=job_obj_with_id, caller=workflow_node)
-    workflow_node.add_callee(job_node)
-
-    job_node.add_callee(
-        create_bash_node(
-            name="run",
-            node_id=None,
-            node_type=BashScriptType.INLINE,
-            source_path="",
-            ci_step_ast=run_step,
-            repo_path="",
-            caller=job_node,
-            recursion_depth=0,
-            macaron_path=macaron.MACARON_PATH,
-        )
-    )
-
+    gh_cg = NodeForest([analyse_github_workflow(workflow_obj, "test.yaml", None)])
     return gh_cg
 
 
diff --git a/tests/parsers/bashparser/test_bashparser.py b/tests/parsers/bashparser/test_bashparser.py
index 3f8ff5331..97c431034 100644
--- a/tests/parsers/bashparser/test_bashparser.py
+++ b/tests/parsers/bashparser/test_bashparser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """
@@ -12,9 +12,8 @@
 import pytest
 
 from macaron import MACARON_PATH
-from macaron.code_analyzer.call_graph import BaseNode
-from macaron.errors import CallGraphError, ParseError
-from macaron.parsers.bashparser import BashScriptType, create_bash_node, parse, parse_file
+from macaron.errors import ParseError
+from macaron.parsers.bashparser import parse, parse_file
 
 
 @pytest.mark.parametrize(
@@ -47,36 +46,3 @@ def test_bashparser_parse_invalid() -> None:
     # Parse the bash script file.
     with pytest.raises(ParseError):
         parse_file(file_path=file_path, macaron_path=MACARON_PATH)
-
-
-def test_create_bash_node_recursively() -> None:
-    """Test creating bash nodes from recursive script."""
-    resources_dir = Path(__file__).parent.joinpath("resources", "bash_files")
-    with pytest.raises(CallGraphError, match="The analysis has reached maximum recursion depth .*"):
-        create_bash_node(
-            name="run",
-            node_id=None,
-            node_type=BashScriptType.FILE,
-            source_path=os.path.join(resources_dir, "recursive.sh"),
-            ci_step_ast=None,
-            repo_path=str(resources_dir),
-            caller=BaseNode(),
-            recursion_depth=0,
-            macaron_path=MACARON_PATH,
-        )
-
-
-def test_create_bash_node_path_traversal_attack() -> None:
-    """Test creating bash nodes from a script that is vulnerable to path traversal attacks."""
-    resources_dir = Path(__file__).parent.joinpath("resources", "bash_files")
-    assert not create_bash_node(
-        name="run",
-        node_id=None,
-        node_type=BashScriptType.FILE,
-        source_path=os.path.join(resources_dir, "path_traversal.sh"),
-        ci_step_ast=None,
-        repo_path=str(resources_dir),
-        caller=BaseNode(),
-        recursion_depth=0,
-        macaron_path=MACARON_PATH,
-    ).callee
diff --git a/tests/provenance/test_provenance_finder.py b/tests/provenance/test_provenance_finder.py
index 5a1148364..774d2ff9e 100644
--- a/tests/provenance/test_provenance_finder.py
+++ b/tests/provenance/test_provenance_finder.py
@@ -13,7 +13,7 @@
 from packageurl import PackageURL
 from pydriller import Git
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.provenance.provenance_finder import (
     find_gav_provenance,
     find_npm_provenance,
@@ -165,7 +165,7 @@ def test_provenance_on_unsupported_ci(macaron_path: Path, service: BaseCIService
 
     ci_info = CIInfo(
         service=service,
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
@@ -190,7 +190,7 @@ def test_provenance_on_supported_ci(macaron_path: Path, test_dir: Path) -> None:
 
     ci_info = CIInfo(
         service=github_actions,
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
diff --git a/tests/slsa_analyzer/build_tool/test_conda.py b/tests/slsa_analyzer/build_tool/test_conda.py
index 896abad13..5adec7688 100644
--- a/tests/slsa_analyzer/build_tool/test_conda.py
+++ b/tests/slsa_analyzer/build_tool/test_conda.py
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.conda import Conda
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -120,7 +119,7 @@ def test_is_conda_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -220,7 +219,7 @@ def test_is_conda_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_docker.py b/tests/slsa_analyzer/build_tool/test_docker.py
index 17e8e0114..4f256e5c9 100644
--- a/tests/slsa_analyzer/build_tool/test_docker.py
+++ b/tests/slsa_analyzer/build_tool/test_docker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the Docker build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.docker import Docker
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -126,7 +125,7 @@ def test_is_docker_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -215,7 +214,7 @@ def test_is_docker_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_flit.py b/tests/slsa_analyzer/build_tool/test_flit.py
index 9a3757c78..6ffbed7f0 100644
--- a/tests/slsa_analyzer/build_tool/test_flit.py
+++ b/tests/slsa_analyzer/build_tool/test_flit.py
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.flit import Flit
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -120,7 +119,7 @@ def test_is_flit_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -220,7 +219,7 @@ def test_is_flit_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_go.py b/tests/slsa_analyzer/build_tool/test_go.py
index 7f0cb431f..3f2796326 100644
--- a/tests/slsa_analyzer/build_tool/test_go.py
+++ b/tests/slsa_analyzer/build_tool/test_go.py
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.go import Go
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -126,7 +125,7 @@ def test_is_go_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -215,7 +214,7 @@ def test_is_go_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_gradle.py b/tests/slsa_analyzer/build_tool/test_gradle.py
index 4298e7fb8..6896159df 100644
--- a/tests/slsa_analyzer/build_tool/test_gradle.py
+++ b/tests/slsa_analyzer/build_tool/test_gradle.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the Gradle build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.gradle import Gradle
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -177,7 +176,7 @@ def test_is_gradle_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -332,7 +331,7 @@ def test_is_gradle_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_hatch.py b/tests/slsa_analyzer/build_tool/test_hatch.py
index 40e8d0f30..3fd687476 100644
--- a/tests/slsa_analyzer/build_tool/test_hatch.py
+++ b/tests/slsa_analyzer/build_tool/test_hatch.py
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.hatch import Hatch
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
@@ -120,7 +119,7 @@ def test_is_hatch_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -220,7 +219,7 @@ def test_is_hatch_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_maven.py b/tests/slsa_analyzer/build_tool/test_maven.py
index 19cb9573f..c67f99298 100644
--- a/tests/slsa_analyzer/build_tool/test_maven.py
+++ b/tests/slsa_analyzer/build_tool/test_maven.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the Maven build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.build_tool.maven import Maven
@@ -177,7 +176,7 @@ def test_is_maven_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -332,7 +331,7 @@ def test_is_maven_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_npm.py b/tests/slsa_analyzer/build_tool/test_npm.py
index 423e02199..f27b623f0 100644
--- a/tests/slsa_analyzer/build_tool/test_npm.py
+++ b/tests/slsa_analyzer/build_tool/test_npm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the NPM build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.build_tool.npm import NPM
@@ -124,7 +123,7 @@ def test_is_npm_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -213,7 +212,7 @@ def test_is_npm_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_pip.py b/tests/slsa_analyzer/build_tool/test_pip.py
index 1a069f31a..fa767bcce 100644
--- a/tests/slsa_analyzer/build_tool/test_pip.py
+++ b/tests/slsa_analyzer/build_tool/test_pip.py
@@ -5,7 +5,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.build_tool.pip import Pip
@@ -91,7 +90,7 @@ def test_is_pip_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -202,7 +201,7 @@ def test_is_pip_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_poetry.py b/tests/slsa_analyzer/build_tool/test_poetry.py
index 4923d23ef..ae42669af 100644
--- a/tests/slsa_analyzer/build_tool/test_poetry.py
+++ b/tests/slsa_analyzer/build_tool/test_poetry.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the Poetry build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.build_tool.poetry import Poetry
@@ -122,7 +121,7 @@ def test_is_poetry_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -222,7 +221,7 @@ def test_is_poetry_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/build_tool/test_yarn.py b/tests/slsa_analyzer/build_tool/test_yarn.py
index 06f645028..48f49977c 100644
--- a/tests/slsa_analyzer/build_tool/test_yarn.py
+++ b/tests/slsa_analyzer/build_tool/test_yarn.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the Yarn build functions."""
@@ -7,7 +7,6 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode
 from macaron.slsa_analyzer.build_tool.base_build_tool import BuildToolCommand
 from macaron.slsa_analyzer.build_tool.language import BuildLanguage
 from macaron.slsa_analyzer.build_tool.yarn import Yarn
@@ -124,7 +123,7 @@ def test_is_yarn_deploy_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
@@ -213,7 +212,7 @@ def test_is_yarn_package_command(
             language_distributions=language_distributions,
             language_url=None,
             ci_path=ci_path,
-            step_node=BaseNode(),
+            step_node=None,
             reachable_secrets=reachable_secrets,
             events=events,
         ),
diff --git a/tests/slsa_analyzer/checks/test_build_as_code_check.py b/tests/slsa_analyzer/checks/test_build_as_code_check.py
index d34ae64e2..0092e0f86 100644
--- a/tests/slsa_analyzer/checks/test_build_as_code_check.py
+++ b/tests/slsa_analyzer/checks/test_build_as_code_check.py
@@ -9,19 +9,14 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
-from macaron.parsers.actionparser import parse as parse_action
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
 from macaron.slsa_analyzer.build_tool.gradle import Gradle
 from macaron.slsa_analyzer.build_tool.pip import Pip
 from macaron.slsa_analyzer.checks.build_as_code_check import BuildAsCodeCheck, BuildAsCodeFacts
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-    build_call_graph_from_node,
-)
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
 from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
 from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
@@ -54,7 +49,7 @@ def test_build_as_code_check_no_callgraph(
     """Test the Build As Code Check when no callgraph is built for the CI service."""
     ci_info = CIInfo(
         service=ci_services[ci_name],
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
@@ -146,7 +141,7 @@ def test_gha_workflow_deployment(
     check = BuildAsCodeCheck()
     ci_info = CIInfo(
         service=github_actions_service,
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
@@ -160,20 +155,8 @@ def test_gha_workflow_deployment(
     gha_deploy.dynamic_data["build_spec"]["tools"] = [pip_tool]
     gha_deploy.dynamic_data["ci_services"] = [ci_info]
 
-    root: BaseNode = BaseNode()
-    gh_cg = CallGraph(root, "")
     workflow_path = os.path.join(workflows_dir, workflow_name)
-    parsed_obj = parse_action(workflow_path)
-    callee = GitHubWorkflowNode(
-        name=os.path.basename(workflow_path),
-        node_type=GitHubWorkflowType.INTERNAL,
-        source_path=workflow_path,
-        parsed_obj=parsed_obj,
-        caller=root,
-    )
-    root.add_callee(callee)
-    build_call_graph_from_node(callee, repo_path="")
-    ci_info["callgraph"] = gh_cg
+    ci_info["callgraph"] = NodeForest([analyse_github_workflow_file(workflow_path, None)])
     assert check.run_check(gha_deploy).result_type == expected_result
 
 
@@ -192,7 +175,7 @@ def test_travis_ci_deploy(
 
     ci_info = CIInfo(
         service=travis_service,
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
diff --git a/tests/slsa_analyzer/checks/test_build_service_check.py b/tests/slsa_analyzer/checks/test_build_service_check.py
index 4a5496c39..21ab9c1fe 100644
--- a/tests/slsa_analyzer/checks/test_build_service_check.py
+++ b/tests/slsa_analyzer/checks/test_build_service_check.py
@@ -8,7 +8,7 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
 from macaron.slsa_analyzer.checks.build_service_check import BuildServiceCheck, BuildServiceFacts
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
@@ -44,7 +44,7 @@ def test_build_service_check_no_callgraph(
     """Test the Build Service Check when no callgraph is built for the CI service."""
     ci_info = CIInfo(
         service=ci_services[ci_name],
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
diff --git a/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py b/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py
index 883dfcc09..a58ceaf2b 100644
--- a/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py
+++ b/tests/slsa_analyzer/checks/test_github_actions_vulnerability_check.py
@@ -11,12 +11,12 @@
 import pytest
 from pytest_httpserver import HTTPServer
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.config.defaults import load_defaults
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
 from macaron.slsa_analyzer.checks.github_actions_vulnerability_check import GitHubActionsVulnsCheck
 from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import build_call_graph_from_path
 from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
 from macaron.slsa_analyzer.specs.ci_spec import CIInfo
 from macaron.slsa_analyzer.specs.inferred_provenance import InferredProvenance
@@ -29,17 +29,14 @@ def get_ci_info(ci_services: dict[str, BaseCIService], ci_name: str, workflow_pa
     """Get CIInfo instance."""
     ci_info = CIInfo(
         service=ci_services[ci_name],
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
         build_info_results=InTotoV01Payload(statement=InferredProvenance().payload),
     )
     if ci_name == "github_actions":
-        root_node: BaseNode = BaseNode()
-        workflow_node = build_call_graph_from_path(root_node, workflow_path=workflow_path, repo_path="")
-        root_node.add_callee(workflow_node)
-        ci_info["callgraph"] = CallGraph(root_node, "")
+        ci_info["callgraph"] = NodeForest([analyse_github_workflow_file(workflow_path, None)])
 
     return ci_info
 
diff --git a/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py b/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py
index 8584e5f35..4abf8df64 100644
--- a/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py
+++ b/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py
@@ -5,7 +5,7 @@
 
 import os
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.asset import VirtualReleaseAsset
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
 from macaron.slsa_analyzer.checks.provenance_l3_content_check import ProvenanceL3ContentCheck
@@ -82,7 +82,7 @@ def test_expectation_check(self) -> None:
         # Test GitHub Actions.
         ci_info = CIInfo(
             service=github_actions,
-            callgraph=CallGraph(BaseNode(), ""),
+            callgraph=NodeForest([]),
             provenance_assets=[],
             release={},
             provenances=[],
diff --git a/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py b/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py
index c36eba0d5..6f72ab739 100644
--- a/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py
+++ b/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py
@@ -8,15 +8,10 @@
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
-from macaron.parsers.actionparser import parse as parse_action
+from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.slsa_analyzer.checks.check_result import CheckResultType
 from macaron.slsa_analyzer.checks.trusted_builder_l3_check import TrustedBuilderL3Check
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-    build_call_graph_from_node,
-)
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
 from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
 from macaron.slsa_analyzer.specs.ci_spec import CIInfo
@@ -47,7 +42,7 @@ def test_trusted_builder_l3_check(
     workflows_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "github", "workflow_files")
     ci_info = CIInfo(
         service=github_actions_service,
-        callgraph=CallGraph(BaseNode(), ""),
+        callgraph=NodeForest([]),
         provenance_assets=[],
         release={},
         provenances=[],
@@ -57,18 +52,6 @@ def test_trusted_builder_l3_check(
     ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="")
     ctx.dynamic_data["ci_services"] = [ci_info]
 
-    root: BaseNode = BaseNode()
-    gh_cg = CallGraph(root, "")
     workflow_path = os.path.join(workflows_dir, workflow_name)
-    parsed_obj = parse_action(workflow_path)
-    callee = GitHubWorkflowNode(
-        name=workflow_name,
-        node_type=GitHubWorkflowType.INTERNAL,
-        source_path=workflow_path,
-        parsed_obj=parsed_obj,
-        caller=root,
-    )
-    build_call_graph_from_node(callee, repo_path="")
-    root.add_callee(callee)
-    ci_info["callgraph"] = gh_cg
+    ci_info["callgraph"] = NodeForest([analyse_github_workflow_file(workflow_path, None)])
     assert check.run_check(ctx).result_type == expected_result
diff --git a/tests/slsa_analyzer/ci_service/test_github_actions.py b/tests/slsa_analyzer/ci_service/test_github_actions.py
index 1995c3705..4da4f7d2a 100644
--- a/tests/slsa_analyzer/ci_service/test_github_actions.py
+++ b/tests/slsa_analyzer/ci_service/test_github_actions.py
@@ -1,21 +1,13 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests GitHub Actions CI service."""
 
-import os
 from datetime import datetime, timedelta
 from pathlib import Path
 
 import pytest
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
-from macaron.parsers.actionparser import parse as parse_action
-from macaron.slsa_analyzer.ci_service.github_actions.analyzer import (
-    GitHubWorkflowNode,
-    GitHubWorkflowType,
-    build_call_graph_from_node,
-)
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
 
 mock_repos = Path(__file__).parent.joinpath("mock_repos")
@@ -30,59 +22,6 @@ def github_actions_() -> GitHubActions:
     return GitHubActions()
 
 
-@pytest.mark.parametrize(
-    (
-        "workflow_name",
-        "expect",
-    ),
-    [
-        (
-            "valid1.yaml",
-            [
-                "GitHubWorkflowNode(valid1.yaml,GitHubWorkflowType.INTERNAL)",
-                "GitHubJobNode(build)",
-                "GitHubWorkflowNode(apache/maven-gh-actions-shared/.github/workflows/maven-verify.yml@v2,GitHubWorkflowType.REUSABLE)",
-            ],
-        ),
-        (
-            "valid2.yaml",
-            [
-                "GitHubWorkflowNode(valid2.yaml,GitHubWorkflowType.INTERNAL)",
-                "GitHubJobNode(build)",
-                "GitHubWorkflowNode(actions/checkout@v3,GitHubWorkflowType.EXTERNAL)",
-                "GitHubWorkflowNode(actions/cache@v3,GitHubWorkflowType.EXTERNAL)",
-                "GitHubWorkflowNode(actions/setup-java@v3,GitHubWorkflowType.EXTERNAL)",
-                "BashNode(Publish to Sonatype Snapshots,BashScriptType.INLINE)",
-            ],
-        ),
-    ],
-    ids=[
-        "Internal and reusable workflows",
-        "Internal and external workflows",
-    ],
-)
-def test_build_call_graph(workflow_name: str, expect: list[str]) -> None:
-    """Test building call graphs for GitHub Actions workflows."""
-    resources_dir = Path(__file__).parent.joinpath("resources", "github")
-
-    # Parse GitHub Actions workflows.
-    root: BaseNode = BaseNode()
-    gh_cg = CallGraph(root, "")
-    workflow_path = os.path.join(resources_dir, workflow_name)
-    parsed_obj = parse_action(workflow_path)
-
-    callee = GitHubWorkflowNode(
-        name=os.path.basename(workflow_path),
-        node_type=GitHubWorkflowType.INTERNAL,
-        source_path=workflow_path,
-        parsed_obj=parsed_obj,
-        caller=root,
-    )
-    root.add_callee(callee)
-    build_call_graph_from_node(callee, repo_path="")
-    assert [str(node) for node in gh_cg.bfs()] == expect
-
-
 def test_is_detected(github_actions: GitHubActions) -> None:
     """Test detecting GitHub Action config files."""
     assert github_actions.is_detected(str(ga_has_build_kws))
diff --git a/tests/slsa_analyzer/test_analyze_context.py b/tests/slsa_analyzer/test_analyze_context.py
index 40a4ad881..4b1b1e776 100644
--- a/tests/slsa_analyzer/test_analyze_context.py
+++ b/tests/slsa_analyzer/test_analyze_context.py
@@ -6,7 +6,7 @@
 from unittest import TestCase
 from unittest.mock import MagicMock
 
-from macaron.code_analyzer.call_graph import BaseNode, CallGraph
+from macaron.code_analyzer.dataflow_analysis.core import NodeForest
 from macaron.json_tools import JsonType
 from macaron.slsa_analyzer.asset import VirtualReleaseAsset
 from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions
@@ -93,7 +93,7 @@ def test_provenances(self) -> None:
 
         gh_actions_ci_info = CIInfo(
             service=gh_actions,
-            callgraph=CallGraph(BaseNode(), ""),
+            callgraph=NodeForest([]),
             provenance_assets=[],
             release={},
             provenances=[

From ad103cff5dcdf1a377e95816374140d687128dc2 Mon Sep 17 00:00:00 2001
From: Parth Govale <parth.govale@oracle.com>
Date: Mon, 15 Dec 2025 13:02:38 +0530
Subject: [PATCH 03/20] feat: prepare Macaron GitHub Action to publish on
 GitHub Marketplace  (#1259)

Prepare Macaron for publishing on GitHub Action Marketplace, and add the documentation.

Signed-off-by: Demolus13 <parth.govale@oracle.com>
---
 .github/workflows/test_macaron_action.yaml    |   2 +-
 README.md                                     |  18 +-
 action.yaml                                   |  21 +--
 docs/source/index.rst                         |   1 +
 docs/source/pages/macaron_action.rst          | 175 ++++++++++++++++++
 scripts/actions/run_macaron_analysis.sh       |   2 +-
 .../run_macaron_policy_verification.sh        |   2 +-
 scripts/actions/setup_macaron.sh              |  72 +++++--
 .../datalog/malware-detection.dl.template     |   1 -
 9 files changed, 255 insertions(+), 39 deletions(-)
 create mode 100644 docs/source/pages/macaron_action.rst

diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml
index 5f3753a98..2621313c6 100644
--- a/.github/workflows/test_macaron_action.yaml
+++ b/.github/workflows/test_macaron_action.yaml
@@ -184,7 +184,7 @@ jobs:
         package_url: pkg:maven/io.github.behnazh-w.demo/example-maven-app@2.0?type=jar
         repo_path: https://github.com/behnazh-w/example-maven-app
         output_dir: macaron_output/detect_malicious_java_dep
-        sbom_path: ./resources/detect_malicious_java_dep/example-sbom.json
+        sbom_path: ./tests/tutorial_resources/detect_malicious_java_dep/example-sbom.json
         deps_depth: '1'
 
     - name: Run Macaron (verify policy - detect-malicious-upload)
diff --git a/README.md b/README.md
index d77aca1d5..925f6e127 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,27 @@
 
 ![Macaron](./docs/source/assets/macaron.svg)
 
-[Full Documentation](https://oracle.github.io/macaron/index.html) | [Tutorials](https://oracle.github.io/macaron/pages/tutorials/index.html) | [Videos](https://www.youtube.com/watch?v=ebo0kGKP6bw) | [Papers](#publications) | [Presentations](#presentations)
+[Full Documentation](https://oracle.github.io/macaron/index.html) | [Tutorials](https://oracle.github.io/macaron/pages/tutorials/index.html) | [Videos](https://www.youtube.com/watch?v=ebo0kGKP6bw) | [Papers](#publications) | [Presentations](#presentations) | [Macaron GitHub Action](https://oracle.github.io/macaron/pages/macaron_action.html)
 
 
 **Macaron** is a software supply chain security analysis tool from Oracle Labs focused on verifying the **build integrity** of artifacts and their dependencies. It helps developers, security teams, and researchers ensure that packages are built as expected and have not been tampered with.
 
+Use Macaron as a GitHub Action
+
+To use the Macaron GitHub Action, add the following step to your workflow (adjust the version as needed). In this example, we use an example policy. For detailed instructions and a comprehensive list of available options, please refer to the [Macaron GitHub Action documentation](https://oracle.github.io/macaron/pages/macaron_action.html).
+
+```yaml
+- uses: oracle/macaron@v0.21.0
+  with:
+    repo_path: 'https://github.com/example/project'
+    policy_file: check-github-actions
+    policy_purl: 'pkg:github.com/example/project'
+    output_dir: 'macaron-output'
+    upload_attestation: true
+```
+
+For detailed instructions and a comprehensive list of available options, please refer to the [Macaron GitHub Action documentation](https://oracle.github.io/macaron/pages/macaron_action.html).
+
 ## Key Capabilities
 
 Macaron supports:
diff --git a/action.yaml b/action.yaml
index 0e77b216d..f28f9d2e9 100644
--- a/action.yaml
+++ b/action.yaml
@@ -58,28 +58,13 @@ outputs:
 runs:
   using: composite
   steps:
-  - name: Setup Python
-    uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
-    with:
-      python-version: 3.11.14
-
-  - name: Setup Go
-    uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
-    with:
-      go-version: '1.23'
-      cache: false
-
-  - name: Setup JDK
-    uses: actions/setup-java@b36c23c0d998641eff861008f374ee103c25ac73 # v4.4.0
-    with:
-      java-version: '17'
-      distribution: oracle
-
   - name: Setup Macaron
-    # Create or reuse a Python virtualenv with the macaron CLI and export the `MACARON` binary path via `$GITHUB_ENV` so later steps can use it.
+    # Create or reuse run_macaron.sh script
     run: |
       bash "$GITHUB_ACTION_PATH/scripts/actions/setup_macaron.sh"
     shell: bash
+    env:
+      ACTION_REF: ${{ github.action_ref }}
 
   - name: Run Macaron Analysis
     id: run-macaron-analysis
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 43fe2af7f..bc9ab5a0d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -121,6 +121,7 @@ intermediate representations as abstractions. Using such abstractions, Macaron i
    pages/installation
    pages/using
    pages/cli_usage/index
+   pages/macaron_action
    pages/tutorials/index
    pages/output_files
    pages/checks/slsa_builds
diff --git a/docs/source/pages/macaron_action.rst b/docs/source/pages/macaron_action.rst
new file mode 100644
index 000000000..6c7db9407
--- /dev/null
+++ b/docs/source/pages/macaron_action.rst
@@ -0,0 +1,175 @@
+Macaron GitHub Action
+=====================
+
+Overview
+--------
+
+This document describes the composite GitHub Action defined in ``action.yaml`` at the repository root. The action uses the Macaron CLI to run supply-chain security analysis and policy verification from a GitHub Actions workflow.
+
+Quick usage
+-----------
+
+When using this action you can reference the action in your workflow. Example:
+
+.. code-block:: yaml
+
+  jobs:
+    analyze:
+      runs-on: ubuntu-latest
+      steps:
+        - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+        - name: Run Macaron Security Analysis
+          uses: oracle/macaron@v0.21.0
+          with:
+            repo_path: 'https://github.com/example/project'
+            policy_file: check-github-actions
+            policy_purl: 'pkg:github.com/example/project'
+            output_dir: 'macaron-output'
+            upload_attestation: true
+
+Example: policy verification only
+----------------------------------
+
+To run only the policy verification step (when you already have an output
+database), call the action with ``policy_file`` and set ``output_dir`` to the
+directory containing ``macaron.db``:
+
+.. code-block:: yaml
+
+  - name: Verify policy
+    uses: oracle/macaron@v0.21.0
+    with:
+      policy_file: policy.dl
+      output_dir: macaron-output
+      upload_attestation: true
+
+Inputs
+------
+The action exposes a number of inputs which map directly to Macaron CLI
+options. Key inputs are listed below (see ``action.yaml`` for the full list):
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 60 20
+
+   * - Input
+     - Description
+     - Default
+   * - ``repo_path``
+     - The path or URL of the repository to analyze.
+     -
+   * - ``package_url``
+     - A PURL identifying a package to analyze instead of a repository.
+     -
+   * - ``sbom_path``
+     - Path to an SBOM file to analyze.
+     -
+   * - ``python_venv``
+     - Path to an existing Python virtualenv (used when analyzing Python
+       packages).
+     -
+   * - ``defaults_path``
+     - Path to a Macaron defaults configuration file.
+     -
+   * - ``policy_file``
+     - Path to a Datalog policy file for policy verification.
+     -
+   * - ``policy_purl``
+     - PURL for a pre-defined policy to use with verification.
+     -
+   * - ``branch`` / ``digest``
+     - Checkout options when analyzing a repository (branch name or commit
+       digest).
+     -
+   * - ``provenance_expectation``
+     - The path to provenance expectation file or directory.
+     -
+   * - ``provenance_file``
+     - The path to the provenance file in in-toto format.
+     -
+   * - ``deps_depth``
+     - Dependency resolution depth (how many levels of transitive dependencies
+       to resolve).
+     - ``0``
+   * - ``show_prelude``
+     - Shows the Datalog prelude for the database.
+     -
+   * - ``github_token``
+     - Token used by Macaron to access GitHub (for cloning, API access,
+       etc.).
+     - ``${{ github.token }}``
+   * - ``output_dir``
+     - Directory where Macaron writes results (database, reports, artifacts).
+     - ``output``
+   * - ``upload_attestation``
+     - When ``true``, the action will attempt to upload a generated
+       verification attestation (VSA) after policy verification.
+     - ``false``
+   * - ``subject_path``
+     - Path to the artifact serving as the subject of the attestation.
+     - ``${{ github.workspace }}``
+
+Outputs
+-------
+
+The composite action exposes the following outputs (set by the
+``run_macaron_policy_verification.sh`` script when applicable):
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 70
+
+   * - Output
+     - Description
+   * - ``policy_report``
+     - Path to the generated policy report JSON file produced by
+       ``macaron verify-policy``. This file contains the policy evaluation
+       results.
+   * - ``vsa_report``
+     - Path to the generated VSA (Verification Summary Attestation) in
+       `in-toto <https://in-toto.io/>`_ JSONL format. If no VSA was produced
+       during verification, the action emits the string ``"VSA Not Generated."``
+       instead of a path.
+
+Default Policies
+----------------
+
+Macaron provides policy templates to run pre-defined policies:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 60 20
+
+   * - Policy name
+     - Description
+     - Template
+   * - ``check-github-actions``
+     - Detects whether a component was built using GitHub Actions that
+       are known to be vulnerable or otherwise unsafe. The policy
+       evaluates a check named `mcn_githubactions_vulnerabilities_1` and
+       reports a passed/failed result for the component when applied.
+     - `check-github-actions template <https://github.com/oracle/macaron/blob/main/src/macaron/resources/policies/datalog/check-github-actions.dl.template>`_
+   * - ``malware-detection``
+     - Checks a component for indicators of malicious or suspicious content.
+       The policy evaluates a check named mcn_detect_malicious_metadata_1
+       and reports a passed/failed result for the component when applied.
+     - `malware-detection template <https://github.com/oracle/macaron/blob/main/src/macaron/resources/policies/datalog/malware-detection.dl.template>`_
+   * - ``malware-detection-dependencies``
+     - Checks the component and its transitive dependencies for indicators
+       of malicious or suspicious content. The policy ensures the component
+       and each dependency pass the `mcn_detect_malicious_metadata_1` check.
+     - `malware-detection-dependencies template <https://github.com/oracle/macaron/blob/main/src/macaron/resources/policies/datalog/malware-detection-dependencies.dl.template>`_
+
+How the action works
+--------------------
+
+1. ``Setup Macaron``: downloads ``run_macaron.sh`` script to install and run macaron in the action.
+
+2. ``Run Macaron Analysis``: calls ``scripts/actions/run_macaron_analysis.sh``
+   which assembles the ``macaron analyze`` command from the inputs and runs
+   it. Results are written into ``output_dir``.
+
+3. ``Run Macaron Policy Verification``: if a policy file or PURL is supplied,
+   the corresponding script runs ``macaron verify-policy`` against the
+   analysis database and writes ``policy_report`` and ``vsa_report`` to
+   ``$GITHUB_OUTPUT`` when produced.
diff --git a/scripts/actions/run_macaron_analysis.sh b/scripts/actions/run_macaron_analysis.sh
index fc97fd916..34305479c 100644
--- a/scripts/actions/run_macaron_analysis.sh
+++ b/scripts/actions/run_macaron_analysis.sh
@@ -19,7 +19,7 @@ else
 fi
 
 OUTPUT_DIR=${OUTPUT_DIR:-output}
-CMD="$CMD --output-dir ${OUTPUT_DIR} -lr . analyze"
+CMD="$CMD --output ${OUTPUT_DIR} -lr . analyze"
 
 if [ -n "${REPO_PATH:-}" ]; then
   CMD="$CMD -rp ${REPO_PATH}"
diff --git a/scripts/actions/run_macaron_policy_verification.sh b/scripts/actions/run_macaron_policy_verification.sh
index fb6218e36..46eb9bee0 100644
--- a/scripts/actions/run_macaron_policy_verification.sh
+++ b/scripts/actions/run_macaron_policy_verification.sh
@@ -25,7 +25,7 @@ if [ -n "$DEFAULTS_PATH" ]; then
 else
   CMD="$MACARON"
 fi
-CMD="$CMD --output-dir ${OUTPUT_DIR} verify-policy --database ${OUTPUT_DIR}/macaron.db"
+CMD="$CMD --output ${OUTPUT_DIR} verify-policy --database ${OUTPUT_DIR}/macaron.db"
 
 if [ -n "$FILE" ] && [ -f "$FILE" ]; then
   CMD="$CMD --file $FILE"
diff --git a/scripts/actions/setup_macaron.sh b/scripts/actions/setup_macaron.sh
index fe2bd9b20..a002bb534 100644
--- a/scripts/actions/setup_macaron.sh
+++ b/scripts/actions/setup_macaron.sh
@@ -4,25 +4,65 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 set -euo pipefail
 
-# Setup Macaron virtualenv and make available via GitHub Actions environment files.
-# This script writes `MACARON=<path>` to `$GITHUB_ENV` so later steps can invoke the macaron CLI, and appends the venv `bin` directory to `$GITHUB_PATH`.
-
 MACARON_DIR="${RUNNER_TEMP:-/tmp}/macaron"
-VENV_MACARON="$MACARON_DIR/.venv/bin/macaron"
-
 mkdir -p "$MACARON_DIR"
 
-if [ -x "$VENV_MACARON" ]; then
-  echo "Using macaron from existing venv: $VENV_MACARON"
-  echo "MACARON=$VENV_MACARON" >> "$GITHUB_ENV"
-  echo "$MACARON_DIR/.venv/bin" >> "$GITHUB_PATH"
-  exit 0
+ACTION_DIR="${RUNNER_TEMP:-/tmp}/macaron-action"
+rm -rf "$ACTION_DIR"
+mkdir -p "$ACTION_DIR"
+
+git clone --filter=blob:none --no-checkout https://github.com/oracle/macaron.git "$ACTION_DIR"
+
+TARGET_REF="${ACTION_REF:-main}"
+MACARON_IMAGE_TAG=""
+cd "$ACTION_DIR"
+if [[ "$TARGET_REF" =~ ^[0-9a-f]{40}$ ]]; then
+    # Check for tags pointing directly at the SHA.
+    tags=$(git tag --points-at "$TARGET_REF")
+    if [[ -n "$tags" ]]; then
+        # Get the first tag (main or first one listed)
+        MACARON_IMAGE_TAG="$(echo "$tags" | head -n1)"
+        echo "SHA $TARGET_REF maps to exact tag: $MACARON_IMAGE_TAG"
+    else
+        # Search all tags that contain the commit (could be ancestor).
+        history_tags=$(git tag --contains "$TARGET_REF")
+        if [[ -n "$history_tags" ]]; then
+            MACARON_IMAGE_TAG="$(echo "$history_tags" | head -n1)"
+            echo "SHA $TARGET_REF is contained in tag: $MACARON_IMAGE_TAG"
+        else
+            echo "No tag found for SHA $TARGET_REF. Defaulting to 'latest'."
+            MACARON_IMAGE_TAG="latest"
+        fi
+    fi
+elif [[ "$TARGET_REF" =~ ^v[0-9] ]]; then
+    MACARON_IMAGE_TAG="$TARGET_REF"
+    echo "Ref is a direct tag: $MACARON_IMAGE_TAG"
+else
+    echo "Using 'latest' image."
+    MACARON_IMAGE_TAG="latest"
 fi
 
 cd "$MACARON_DIR"
-git clone https://github.com/oracle/macaron.git .
-make venv
-export PATH="$MACARON_DIR/.venv/bin:$PATH"
-make setup
-echo "MACARON=$VENV_MACARON" >> "$GITHUB_ENV"
-echo "$MACARON_DIR/.venv/bin" >> "$GITHUB_PATH"
+
+# Download image using macaron_image_tag else latest release
+if [ "${MACARON_IMAGE_TAG}" != "latest" ]; then
+    echo "MACARON_IMAGE_TAG detected: ${MACARON_IMAGE_TAG}"
+    URL="https://raw.githubusercontent.com/oracle/macaron/refs/tags/${MACARON_IMAGE_TAG}/scripts/release_scripts/run_macaron.sh"
+    SCRIPT_NAME="run_macaron_${MACARON_IMAGE_TAG}.sh"
+else
+    echo "Using default latest release."
+    URL="https://raw.githubusercontent.com/oracle/macaron/release/scripts/release_scripts/run_macaron.sh"
+    SCRIPT_NAME="run_macaron.sh"
+fi
+
+# Get the run_macaron.sh script
+if [ ! -f "$SCRIPT_NAME" ]; then
+  echo "Downloading $SCRIPT_NAME from: $URL"
+  curl -fSL -o "$SCRIPT_NAME" "$URL"
+else
+  echo "$SCRIPT_NAME already exists, skipping download."
+fi
+
+chmod +x "$SCRIPT_NAME"
+echo "MACARON=$MACARON_DIR/$SCRIPT_NAME" >> "$GITHUB_ENV"
+echo "MACARON_IMAGE_TAG=${MACARON_IMAGE_TAG}" >> "$GITHUB_ENV"
diff --git a/src/macaron/resources/policies/datalog/malware-detection.dl.template b/src/macaron/resources/policies/datalog/malware-detection.dl.template
index 77eedc5cf..4429cfec6 100644
--- a/src/macaron/resources/policies/datalog/malware-detection.dl.template
+++ b/src/macaron/resources/policies/datalog/malware-detection.dl.template
@@ -3,7 +3,6 @@
 Policy("check-component", component_id, "Check component artifacts.") :-
     check_passed(component_id, "mcn_detect_malicious_metadata_1").
 
-
 apply_policy_to("check-component", component_id) :-
     is_component(component_id, purl),
     match("<PACKAGE_PURL>", purl).

From 1665a69d745ffa3d5888545d40b9878b12390abc Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Thu, 18 Dec 2025 10:34:55 +1000
Subject: [PATCH 04/20] feat: infer chronologically likeliest setuptools
 version (#1260)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../common_spec/base_spec.py                  |   2 +-
 .../common_spec/pypi_spec.py                  |  16 +++
 .../metadata/similar_projects.py              |   3 -
 src/macaron/repo_finder/repo_finder_pypi.py   |   2 +-
 .../package_registry/pypi_registry.py         | 103 ++++++++++++++++--
 .../pypi/test_wheel_absence.py                |   2 +-
 6 files changed, 115 insertions(+), 13 deletions(-)

diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py
index b410729fe..c567609f7 100644
--- a/src/macaron/build_spec_generator/common_spec/base_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/base_spec.py
@@ -62,7 +62,7 @@ class BaseBuildSpecDict(TypedDict, total=False):
     build_commands: NotRequired[list[list[str]]]
 
     #: List of shell commands to test the project.
-    test_commands: NotRequired[list[str]]
+    test_commands: NotRequired[list[list[str]]]
 
     #: Environment variables required during build or test.
     environment: NotRequired[dict[str, str]]
diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index bb90ba6a1..999afbb19 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -120,6 +120,8 @@ def resolve_fields(self, purl: PackageURL) -> None:
         python_version_set: set[str] = set()
         wheel_name_python_version_list: list[str] = []
         wheel_name_platforms: set[str] = set()
+        # Precautionary fallback to default version
+        chronologically_likeliest_version: str = defaults.get("heuristic.pypi", "default_setuptools")
 
         if pypi_package_json is not None:
             if pypi_package_json.package_json or pypi_package_json.download(dest=""):
@@ -150,6 +152,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             parsed_build_requires["setuptools"] = "==" + defaults.get(
                                 "heuristic.pypi", "setuptools_version_emitting_platform_unknown"
                             )
+                        chronologically_likeliest_version = (
+                            pypi_package_json.get_chronologically_suitable_setuptools_version()
+                        )
                 except SourceCodeError:
                     logger.debug("Could not find pure wheel matching this PURL")
 
@@ -165,6 +170,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             requires = json_extract(content, ["build-system", "requires"], list)
                             if requires:
                                 build_requires_set.update(elem.replace(" ", "") for elem in requires)
+                            # If we cannot find `requires` in `[build-system]`, we lean on the fact that setuptools
+                            # was the de-facto build tool, and infer a setuptools version to include.
+                            else:
+                                build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
                             backend = json_extract(content, ["build-system", "build-backend"], str)
                             if backend:
                                 build_backends_set.add(backend.replace(" ", ""))
@@ -177,6 +186,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
                                 build_requires_set,
                                 build_backends_set,
                             )
+                            # Here we have successfully analyzed the pyproject.toml file. Now, if we have a setup.py/cfg,
+                            # we also need to infer a setuptools version to infer.
+                            if pypi_package_json.file_exists("setup.py") or pypi_package_json.file_exists("setup.cfg"):
+                                build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
                         except TypeError as error:
                             logger.debug(
                                 "Found a type error while reading the pyproject.toml file from the sdist: %s", error
@@ -185,6 +198,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             logger.debug("Failed to read the pyproject.toml file from the sdist: %s", error)
                         except SourceCodeError as error:
                             logger.debug("No pyproject.toml found: %s", error)
+                            # Here we do not have a pyproject.toml file. Instead, we lean on the fact that setuptools
+                            # was the de-facto build tool, and infer a setuptools version to include.
+                            build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
                 except SourceCodeError as error:
                     logger.debug("No source distribution found: %s", error)
 
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py
index 872c1143d..b98686c99 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py
@@ -74,9 +74,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
                     False,
                     pypi_package_json.pypi_registry,
                     {},
-                    "",
-                    "",
-                    "",
                     PyPIInspectorAsset("", [], {}),
                 )
                 if not adjacent_pypi_json.download(""):
diff --git a/src/macaron/repo_finder/repo_finder_pypi.py b/src/macaron/repo_finder/repo_finder_pypi.py
index 8aa05d7ce..2941c71cf 100644
--- a/src/macaron/repo_finder/repo_finder_pypi.py
+++ b/src/macaron/repo_finder/repo_finder_pypi.py
@@ -59,7 +59,7 @@ def find_repo(
         if not pypi_registry:
             return "", RepoFinderInfo.PYPI_NO_REGISTRY
         pypi_asset = PyPIPackageJsonAsset(
-            purl.name, purl.version, False, pypi_registry, {}, "", "", "", PyPIInspectorAsset("", [], {})
+            purl.name, purl.version, False, pypi_registry, {}, PyPIInspectorAsset("", [], {})
         )
 
     if not pypi_asset:
diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
index a4306d905..ce8630d37 100644
--- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
+++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
@@ -4,6 +4,7 @@
 """The module provides abstractions for the pypi package registry."""
 from __future__ import annotations
 
+import bisect
 import hashlib
 import logging
 import os
@@ -15,7 +16,7 @@
 import zipfile
 from collections.abc import Callable, Generator, Iterator
 from contextlib import contextmanager
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from typing import TYPE_CHECKING
 
@@ -502,6 +503,42 @@ def get_maintainer_join_date(self, username: str) -> datetime | None:
 
         return res.replace(tzinfo=None) if res else None
 
+    def get_matching_setuptools_version(self, package_release_datetime: datetime) -> str:
+        """Find the setuptools that would be "latest" for the input datetime.
+
+        Parameters
+        ----------
+        package_release_datetime: str
+            Release datetime of a package we wish to rebuild
+
+        Returns
+        -------
+            str: Matching version of setuptools
+        """
+        setuptools_endpoint = urllib.parse.urljoin(self.registry_url, "pypi/setuptools/json")
+        setuptools_json = self.download_package_json(setuptools_endpoint)
+        releases = json_extract(setuptools_json, ["releases"], dict)
+        if releases:
+            release_tuples = [
+                (version, release_info[0].get("upload_time"))
+                for version, release_info in releases.items()
+                if release_info
+            ]
+            # Cannot assume this is sorted, as releases is just a dict
+            release_tuples.sort(key=lambda x: x[1])
+            # bisect_left gives position to insert package_release_datetime to maintain order, hence we do -1
+            index = (
+                bisect.bisect_left(
+                    release_tuples, package_release_datetime, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S")
+                )
+                - 1
+            )
+            return str(release_tuples[index][0])
+        # This realistically cannot happen: it would mean we somehow are trying to rebuild
+        # for a package and version with no releases.
+        # Return default just in case.
+        return defaults.get("heuristic.pypi", "default_setuptools")
+
     @staticmethod
     def extract_attestation(attestation_data: dict) -> dict | None:
         """Extract the first attestation file from a PyPI attestation response.
@@ -618,13 +655,16 @@ class PyPIPackageJsonAsset:
     package_json: dict
 
     #: The source code temporary location name.
-    package_sourcecode_path: str
+    package_sourcecode_path: str = field(init=False)
 
     #: The wheel temporary location name.
-    wheel_path: str
+    wheel_path: str = field(init=False)
 
     #: Name of the wheel file.
-    wheel_filename: str
+    wheel_filename: str = field(init=False)
+
+    #: The datetime that the wheel was uploaded.
+    package_upload_time: datetime | None = field(default=None, init=False)
 
     #: The pypi inspector information about this package
     inspector_asset: PyPIInspectorAsset
@@ -769,6 +809,11 @@ def get_wheel_url(self, tag: str = "none-any") -> str | None:
         if not urls:
             return None
         for distribution in urls:
+            # In this way we have a package_upload_time even if we cannot find the wheel.
+            try:
+                self.package_upload_time = datetime.strptime(distribution.get("upload_time") or "", "%Y-%m-%dT%H:%M:%S")
+            except ValueError:
+                logging.debug("Could not parse the uploaded datetime: %s", distribution.get("upload_time") or "")
             # Only examine wheels
             if distribution.get("packagetype") != "bdist_wheel":
                 continue
@@ -779,6 +824,12 @@ def get_wheel_url(self, tag: str = "none-any") -> str | None:
             # Continue to getting url
             wheel_url: str = distribution.get("url") or ""
             if wheel_url:
+                try:
+                    self.package_upload_time = datetime.strptime(
+                        distribution.get("upload_time") or "", "%Y-%m-%dT%H:%M:%S"
+                    )
+                except ValueError:
+                    logging.debug("Could not parse the uploaded datetime: %s", distribution.get("upload_time") or "")
                 try:
                     parsed_url = urllib.parse.urlparse(wheel_url)
                 except ValueError:
@@ -919,6 +970,33 @@ def get_sourcecode_file_contents(self, path: str) -> bytes:
             logger.debug(error_msg)
             raise SourceCodeError(error_msg) from read_error
 
+    def file_exists(self, path: str) -> bool:
+        """Check if a file exists in the downloaded source code.
+
+        The path can be relative to the package_sourcecode_path attribute, or an absolute path.
+
+        Parameters
+        ----------
+        path: str
+            The absolute or relative to package_sourcecode_path file path to check for.
+
+        Returns
+        -------
+            bool: Whether or not a file at path absolute or relative to package_sourcecode_path exists.
+        """
+        if not self.package_sourcecode_path:
+            # No source code files were downloaded
+            return False
+
+        if not os.path.isabs(path):
+            path = os.path.join(self.package_sourcecode_path, path)
+
+        if not os.path.exists(path):
+            # Could not find a file at that path
+            return False
+
+        return True
+
     def iter_sourcecode(self) -> Iterator[tuple[str, bytes]]:
         """
         Iterate through all source code files.
@@ -1054,6 +1132,19 @@ def get_inspector_src_preview_links(self) -> bool:
         # If all distributions were invalid and went along a 'continue' path.
         return bool(self.inspector_asset)
 
+    def get_chronologically_suitable_setuptools_version(self) -> str:
+        """Find version of setuptools that would be "latest" for this package.
+
+        Returns
+        -------
+        str
+            Chronologically likeliest setuptools version
+        """
+        if self.package_upload_time:
+            return self.pypi_registry.get_matching_setuptools_version(self.package_upload_time)
+        # If we cannot infer upload time for the package, return the default
+        return defaults.get("heuristic.pypi", "default_setuptools")
+
 
 def find_or_create_pypi_asset(
     asset_name: str, asset_version: str | None, pypi_registry_info: PackageRegistryInfo
@@ -1091,8 +1182,6 @@ def find_or_create_pypi_asset(
         logger.debug("Failed to create PyPIPackageJson asset.")
         return None
 
-    asset = PyPIPackageJsonAsset(
-        asset_name, asset_version, False, package_registry, {}, "", "", "", PyPIInspectorAsset("", [], {})
-    )
+    asset = PyPIPackageJsonAsset(asset_name, asset_version, False, package_registry, {}, PyPIInspectorAsset("", [], {}))
     pypi_registry_info.metadata.append(asset)
     return asset
diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py
index 7f6e4e593..c00feb2cb 100644
--- a/tests/malware_analyzer/pypi/test_wheel_absence.py
+++ b/tests/malware_analyzer/pypi/test_wheel_absence.py
@@ -125,7 +125,7 @@ def test_get_inspector_src_preview_links(mock_send_head_http_raw: MagicMock) ->
     mock_send_head_http_raw.return_value = MagicMock()  # Assume valid URL for testing purposes.
 
     pypi_package_json = PyPIPackageJsonAsset(
-        package_name, version, False, pypi_registry, package_json, "", "", "", PyPIInspectorAsset("", [], {})
+        package_name, version, False, pypi_registry, package_json, PyPIInspectorAsset("", [], {})
     )
 
     assert pypi_package_json.get_inspector_src_preview_links() is True

From d190f653c1dc39c50c151972cd8e68aa60317dc7 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Fri, 19 Dec 2025 14:16:23 +1000
Subject: [PATCH 05/20] fix!: use --output option for Macaron Python Package
 (#1266)

Fixes inconsistent output path handling between Docker CLI and Python package CLI in Macaron by introducing a --output option and HOST_OUTPUT env var, ensuring reports/logs use container-external paths when set.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .gitignore                                    |  1 +
 docs/source/pages/cli_usage/index.rst         |  2 +-
 scripts/release_scripts/run_macaron.sh        |  6 ++
 src/macaron/__main__.py                       | 46 ++++++++++-----
 .../build_spec_generator.py                   |  7 ++-
 src/macaron/config/defaults.py                | 11 ++--
 src/macaron/config/global_config.py           |  3 +
 src/macaron/dependency_analyzer/cyclonedx.py  |  5 +-
 src/macaron/output_reporter/__init__.py       | 59 ++++++++++++++++++-
 src/macaron/output_reporter/reporter.py       | 13 ++--
 src/macaron/provenance/provenance_verifier.py |  5 +-
 src/macaron/repo_finder/repo_utils.py         |  7 ++-
 .../ci_service/base_ci_service.py             |  5 +-
 tests/config/test_defaults.py                 |  7 +--
 tests/conftest.py                             |  2 +-
 .../cases/apache_maven_sbom/test.yaml         |  3 +-
 tests/integration/run.py                      | 19 +++---
 tests/macaron_testcase.py                     |  2 +-
 tests/test_main.py                            |  2 +-
 19 files changed, 146 insertions(+), 59 deletions(-)

diff --git a/.gitignore b/.gitignore
index 758a3d0cb..fe358ea54 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,6 +167,7 @@ gradlew.bat
 .macaron
 reports
 output
+output_dir
 cdx_debug.json
 sbom_debug.json
 golang/internal/filewriter/mock_dir/result.json
diff --git a/docs/source/pages/cli_usage/index.rst b/docs/source/pages/cli_usage/index.rst
index dc169c3a2..668794ec9 100644
--- a/docs/source/pages/cli_usage/index.rst
+++ b/docs/source/pages/cli_usage/index.rst
@@ -42,7 +42,7 @@ Common Options
 
     Disable Rich UI output. This will turn off any rich formatting (e.g., colored output, tables, etc.) used in the terminal UI.
 
-.. option:: -o OUTPUT_DIR, --output-dir OUTPUT_DIR
+.. option:: -o OUTPUT, --output OUTPUT_DIR
 
     The output destination path for Macaron. This is where Macaron will store the results of the analysis.
 
diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh
index 65dd08954..306ae1bf7 100755
--- a/scripts/release_scripts/run_macaron.sh
+++ b/scripts/release_scripts/run_macaron.sh
@@ -56,6 +56,9 @@ IMAGE="ghcr.io/oracle/macaron"
 # Workspace directory inside of the container.
 MACARON_WORKSPACE="/home/macaron"
 
+# Host output path outside the container.
+HOST_OUTPUT=""
+
 # The entrypoint to run Macaron or the Policy Engine.
 # It it set by default to macaron.
 # We use an array here to preserve the arguments as provided by the user.
@@ -388,8 +391,10 @@ fi
 if [[ -n "${arg_output:-}" ]]; then
     output="${arg_output}"
     argv_main+=("--output" "${MACARON_WORKSPACE}/output/")
+    HOST_OUTPUT="${arg_output}"
 else
     output=$(pwd)/output
+    HOST_OUTPUT="output"
     echo "Setting default output directory to ${output}."
 fi
 
@@ -659,6 +664,7 @@ docker run \
     --rm -i "${tty[@]}" \
     -e "USER_UID=${USER_UID}" \
     -e "USER_GID=${USER_GID}" \
+    -e "HOST_OUTPUT=${HOST_OUTPUT}" \
     "${proxy_vars[@]}" \
     "${prod_vars[@]}" \
     "${mounts[@]}" \
diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py
index e23844e5e..dd103eec6 100644
--- a/src/macaron/__main__.py
+++ b/src/macaron/__main__.py
@@ -22,6 +22,7 @@
 from macaron.config.global_config import global_config
 from macaron.console import RichConsoleHandler, access_handler
 from macaron.errors import ConfigurationError
+from macaron.output_reporter import find_report_output_path
 from macaron.output_reporter.reporter import HTMLReporter, JSONReporter, PolicyReporter
 from macaron.policy_engine.policy_engine import run_policy_engine, show_prelude
 from macaron.repo_finder import repo_finder
@@ -280,14 +281,14 @@ def verify_policy(verify_policy_args: argparse.Namespace) -> int:
         rich_handler = access_handler.get_handler()
         if vsa is not None:
             vsa_filepath = os.path.join(global_config.output_path, "vsa.intoto.jsonl")
-            rich_handler.update_vsa(os.path.relpath(vsa_filepath, os.getcwd()))
+            rich_handler.update_vsa(find_report_output_path(vsa_filepath))
             logger.info(
                 "Generating the Verification Summary Attestation (VSA) to %s.",
-                os.path.relpath(vsa_filepath, os.getcwd()),
+                find_report_output_path(vsa_filepath),
             )
             logger.info(
                 "To decode and inspect the payload, run `cat %s | jq -r '.payload' | base64 -d | jq`.",
-                os.path.relpath(vsa_filepath, os.getcwd()),
+                find_report_output_path(vsa_filepath),
             )
             try:
                 with open(vsa_filepath, mode="w", encoding="utf-8") as file:
@@ -295,7 +296,7 @@ def verify_policy(verify_policy_args: argparse.Namespace) -> int:
             except OSError as err:
                 logger.error(
                     "Could not generate the VSA to %s. Error: %s",
-                    os.path.relpath(vsa_filepath, os.getcwd()),
+                    find_report_output_path(vsa_filepath),
                     err,
                 )
         else:
@@ -372,7 +373,7 @@ def perform_action(action_args: argparse.Namespace) -> None:
             if not action_args.disable_rich_output:
                 rich_handler.start("dump-defaults")
             # Create the defaults.ini file in the output dir and exit.
-            create_defaults(action_args.output_dir, os.getcwd())
+            create_defaults(action_args.output)
             sys.exit(os.EX_OK)
 
         case "verify-policy":
@@ -466,6 +467,9 @@ def main(argv: list[str] | None = None) -> None:
     global_config.gl_token = _get_token_from_dict_or_env("MCN_GITLAB_TOKEN", token_dict)
     global_config.gl_self_host_token = _get_token_from_dict_or_env("MCN_SELF_HOSTED_GITLAB_TOKEN", token_dict)
 
+    # Set the host output path, which would be set if Macaron is running inside a container.
+    global_config.host_output_path = _get_host_output_path_env()
+
     main_parser = argparse.ArgumentParser(prog="macaron")
 
     main_parser.add_argument(
@@ -492,7 +496,7 @@ def main(argv: list[str] | None = None) -> None:
 
     main_parser.add_argument(
         "-o",
-        "--output-dir",
+        "--output",
         default=os.path.join(os.getcwd(), "output"),
         help="The output destination path for Macaron",
     )
@@ -724,29 +728,29 @@ def main(argv: list[str] | None = None) -> None:
 
     try:
         # Set the output directory.
-        if not args.output_dir:
+        if not args.output:
             logger.error("The output path cannot be empty. Exiting ...")
             sys.exit(os.EX_USAGE)
 
-        if os.path.isfile(args.output_dir):
+        if os.path.isfile(args.output):
             logger.error("The output directory already exists. Exiting ...")
             sys.exit(os.EX_USAGE)
 
-        if os.path.isdir(args.output_dir):
+        if os.path.isdir(args.output):
             logger.info(
                 "Setting the output directory to %s",
-                os.path.relpath(args.output_dir, os.getcwd()),
+                find_report_output_path(args.output),
             )
         else:
             logger.info(
                 "No directory at %s. Creating one ...",
-                os.path.relpath(args.output_dir, os.getcwd()),
+                find_report_output_path(args.output),
             )
-            os.makedirs(args.output_dir)
+            os.makedirs(args.output)
 
         # Add file handler to the root logger. Remove stream handler from the
         # root logger to prevent dependencies printing logs to stdout.
-        debug_log_path = os.path.join(args.output_dir, "debug.log")
+        debug_log_path = os.path.join(args.output, "debug.log")
         log_file_handler = logging.FileHandler(debug_log_path, "w")
         log_file_handler.setFormatter(logging.Formatter(log_format))
         if args.disable_rich_output:
@@ -769,8 +773,8 @@ def main(argv: list[str] | None = None) -> None:
         # set through analyze sub-command.
         global_config.load(
             macaron_path=macaron.MACARON_PATH,
-            output_path=args.output_dir,
-            build_log_path=os.path.join(args.output_dir, "build_log"),
+            output_path=args.output,
+            build_log_path=os.path.join(args.output, "build_log"),
             debug_level=log_level,
             local_repos_path=args.local_repos_path,
             resources_path=os.path.join(macaron.MACARON_PATH, "resources"),
@@ -800,5 +804,17 @@ def _get_token_from_dict_or_env(token: str, token_dict: dict[str, str]) -> str:
     return token_dict[token] if token in token_dict else os.environ.get(token) or ""
 
 
+def _get_host_output_path_env() -> str:
+    """
+    Get the host output path from the HOST_OUTPUT environment variable.
+
+    Returns
+    -------
+    str
+        The HOST_OUTPUT environment variable or an empty string.
+    """
+    return os.environ.get("HOST_OUTPUT") or ""
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py
index c23fc3c80..9d7fd94ca 100644
--- a/src/macaron/build_spec_generator/build_spec_generator.py
+++ b/src/macaron/build_spec_generator/build_spec_generator.py
@@ -17,6 +17,7 @@
 from macaron.build_spec_generator.reproducible_central.reproducible_central import gen_reproducible_central_build_spec
 from macaron.console import access_handler
 from macaron.errors import GenerateBuildSpecError
+from macaron.output_reporter import find_report_output_path
 from macaron.path_utils.purl_based_path import get_purl_based_dir
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -120,17 +121,17 @@ def gen_build_spec_for_purl(
     logger.info(
         "Generating the %s format build spec to %s",
         build_spec_format.value,
-        os.path.relpath(build_spec_file_path, os.getcwd()),
+        find_report_output_path(build_spec_file_path),
     )
     rich_handler = access_handler.get_handler()
-    rich_handler.update_gen_build_spec("Build Spec Path:", os.path.relpath(build_spec_file_path, os.getcwd()))
+    rich_handler.update_gen_build_spec("Build Spec Path:", find_report_output_path(build_spec_file_path))
     try:
         with open(build_spec_file_path, mode="w", encoding="utf-8") as file:
             file.write(build_spec_content)
     except OSError as error:
         logger.error(
             "Could not create the build spec at %s. Error: %s",
-            os.path.relpath(build_spec_file_path, os.getcwd()),
+            find_report_output_path(build_spec_file_path),
             error,
         )
         return os.EX_OSERR
diff --git a/src/macaron/config/defaults.py b/src/macaron/config/defaults.py
index a5b487c0b..d0f355092 100644
--- a/src/macaron/config/defaults.py
+++ b/src/macaron/config/defaults.py
@@ -10,6 +10,7 @@
 import shutil
 
 from macaron.console import access_handler
+from macaron.output_reporter import find_report_output_path
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -138,15 +139,13 @@ def load_defaults(user_config_path: str) -> bool:
         return False
 
 
-def create_defaults(output_path: str, cwd_path: str) -> bool:
+def create_defaults(output_path: str) -> bool:
     """Create the ``defaults.ini`` file at the Macaron's root dir for end users.
 
     Parameters
     ----------
     output_path : str
         The path where the ``defaults.ini`` will be created.
-    cwd_path : str
-        The path to the current working directory.
 
     Returns
     -------
@@ -169,12 +168,12 @@ def create_defaults(output_path: str, cwd_path: str) -> bool:
         shutil.copy2(src_path, dest_path)
         logger.info(
             "Dumped the default values in %s.",
-            os.path.relpath(os.path.join(output_path, "defaults.ini"), cwd_path),
+            find_report_output_path(os.path.join(output_path, "defaults.ini")),
         )
-        rich_handler.update_dump_defaults(os.path.relpath(dest_path, cwd_path))
+        rich_handler.update_dump_defaults(find_report_output_path(dest_path))
         return True
     # We catch OSError to support errors on different platforms.
     except OSError as error:
-        logger.error("Failed to create %s: %s.", os.path.relpath(dest_path, cwd_path), error)
+        logger.error("Failed to create %s: %s.", find_report_output_path(dest_path), error)
         rich_handler.update_dump_defaults("[bold red]Failed[/]")
         return False
diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py
index 4e2befa6f..78bedc34b 100644
--- a/src/macaron/config/global_config.py
+++ b/src/macaron/config/global_config.py
@@ -49,6 +49,9 @@ class GlobalConfig:
     #: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory.
     local_maven_repo: str | None = None
 
+    #: The host output path, if Macaron is executed as a container.
+    host_output_path: str = ""
+
     def load(
         self,
         macaron_path: str,
diff --git a/src/macaron/dependency_analyzer/cyclonedx.py b/src/macaron/dependency_analyzer/cyclonedx.py
index 9fec0536f..c46a8a773 100644
--- a/src/macaron/dependency_analyzer/cyclonedx.py
+++ b/src/macaron/dependency_analyzer/cyclonedx.py
@@ -26,6 +26,7 @@
 from macaron.config.target_config import Configuration
 from macaron.database.table_definitions import Component
 from macaron.errors import CycloneDXParserError, DependencyAnalyzerError
+from macaron.output_reporter import find_report_output_path
 from macaron.output_reporter.scm import SCMStatus
 from macaron.repo_finder.repo_finder import find_repo
 from macaron.repo_finder.repo_finder_enums import RepoFinderInfo
@@ -359,7 +360,7 @@ def resolve_dependencies(main_ctx: Any, sbom_path: str, recursive: bool = False)
                 "Running %s version %s dependency analyzer on %s",
                 dep_analyzer.tool_name,
                 dep_analyzer.tool_version,
-                os.path.relpath(main_ctx.component.repository.fs_path, os.getcwd()),
+                find_report_output_path(main_ctx.component.repository.fs_path),
             )
 
             log_path = os.path.join(
@@ -397,7 +398,7 @@ def resolve_dependencies(main_ctx: Any, sbom_path: str, recursive: bool = False)
             logger.info(
                 "Stored dependency resolver log for %s to %s.",
                 dep_analyzer.tool_name,
-                os.path.relpath(log_path, os.getcwd()),
+                find_report_output_path(log_path),
             )
 
         # Use repo finder to find more repositories to analyze.
diff --git a/src/macaron/output_reporter/__init__.py b/src/macaron/output_reporter/__init__.py
index f29d8ac6c..bbcaa8ea3 100644
--- a/src/macaron/output_reporter/__init__.py
+++ b/src/macaron/output_reporter/__init__.py
@@ -1,2 +1,59 @@
-# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains helper functions for reporting."""
+
+import logging
+import os
+from pathlib import Path
+
+from macaron.config.global_config import global_config
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def find_report_output_path(file_name: str, host_output_path: str | None = None) -> str:
+    """
+    Determine the output path for a report file.
+
+    If ``host_output_path`` is empty or None, returns the file path relative
+    to the current working directory. Otherwise, prefixes the path (stripping
+    the first directory component) with the provided container host output path.
+    Returns empty string if path has no parts to strip.
+
+    Parameters
+    ----------
+    file_name : str
+        Path to the input file (absolute or relative).
+    host_output_path : str | None
+        Base output directory path.
+
+    Returns
+    -------
+    str
+        Output path as string.
+
+    Examples
+    --------
+    >>> find_report_output_path("output/reports/maven/foo/bar", host_output_path=None)
+    'output/reports/maven/foo/bar'
+    >>> find_report_output_path("output/reports/maven/foo/bar", host_output_path="output_dir")
+    'output_dir/reports/maven/foo/bar'
+    >>> find_report_output_path("foo", host_output_path="output")
+    'output/'
+    >>> find_report_output_path("", host_output_path="output")
+    ''
+    """
+    if not file_name:
+        return ""
+    if host_output_path is None:
+        host_output_path = global_config.host_output_path
+    try:
+        file_path = Path(os.path.relpath(file_name, os.getcwd()))
+    except (ValueError, OSError) as error:
+        logger.debug("Failed to create path for %s: %s", file_name, error)
+        return ""
+    if not host_output_path:
+        return str(file_path)
+
+    return os.path.join(host_output_path, file_path.relative_to(file_path.parts[0])).rstrip(".")
diff --git a/src/macaron/output_reporter/reporter.py b/src/macaron/output_reporter/reporter.py
index 45589836e..418c79160 100644
--- a/src/macaron/output_reporter/reporter.py
+++ b/src/macaron/output_reporter/reporter.py
@@ -20,6 +20,7 @@
 
 import macaron.output_reporter.jinja2_extensions as jinja2_extensions  # pylint: disable=consider-using-from-import
 from macaron.console import access_handler
+from macaron.output_reporter import find_report_output_path
 from macaron.output_reporter.results import Report
 from macaron.output_reporter.scm import SCMStatus
 
@@ -62,13 +63,13 @@ def write_file(self, file_path: str, data: str) -> bool:
         """
         try:
             with open(file_path, mode=self.mode, encoding=self.encoding) as file:
-                logger.info("Writing to file %s", os.path.relpath(file_path, os.getcwd()))
+                logger.info("Writing to file %s", find_report_output_path(file_path))
                 file.write(data)
                 return True
         except OSError as error:
             logger.error(
                 "Cannot write to %s. Error: %s",
-                os.path.relpath(file_path, os.getcwd()),
+                find_report_output_path(file_path),
                 error,
             )
             return False
@@ -128,7 +129,7 @@ def generate(self, target_dir: str, report: Report | dict) -> None:
             dep_file_name = os.path.join(target_dir, "dependencies.json")
             serialized_configs = list(report.get_serialized_configs())
             self.write_file(dep_file_name, json.dumps(serialized_configs, indent=self.indent))
-            self.rich_handler.update_report_table("Dependencies Report", os.path.relpath(dep_file_name, os.getcwd()))
+            self.rich_handler.update_report_table("Dependencies Report", find_report_output_path(dep_file_name))
 
             for record in report.get_records():
                 if record.context and record.status == SCMStatus.AVAILABLE:
@@ -136,7 +137,7 @@ def generate(self, target_dir: str, report: Report | dict) -> None:
                     json_data = json.dumps(record.get_dict(), indent=self.indent)
                     self.write_file(file_name, json_data)
                     self.rich_handler.update_report_table(
-                        "JSON Report", os.path.relpath(file_name, os.getcwd()), record.record_id
+                        "JSON Report", find_report_output_path(file_name), record.record_id
                     )
         except TypeError as error:
             logger.critical("Cannot serialize output report to JSON: %s", error)
@@ -231,7 +232,7 @@ def generate(self, target_dir: str, report: Report | dict) -> None:
                     html = self.template.render(deepcopy(record.get_dict()))
                     self.write_file(file_name, html)
                     self.rich_handler.update_report_table(
-                        "HTML Report", os.path.relpath(file_name, os.getcwd()), record.record_id
+                        "HTML Report", find_report_output_path(file_name), record.record_id
                     )
         except TemplateSyntaxError as error:
             location = f"line {error.lineno}"
@@ -285,7 +286,7 @@ def generate(self, target_dir: str, report: Report | dict) -> None:
                 json.dumps(report, indent=self.indent),
             )
             self.rich_handler.update_policy_report(
-                os.path.relpath(os.path.join(target_dir, "policy_report.json"), os.getcwd())
+                find_report_output_path(os.path.join(target_dir, "policy_report.json"))
             )
         except (TypeError, ValueError, OSError) as error:
             logger.critical("Cannot serialize the policy report to JSON: %s", error)
diff --git a/src/macaron/provenance/provenance_verifier.py b/src/macaron/provenance/provenance_verifier.py
index 0be9920fa..06356eff6 100644
--- a/src/macaron/provenance/provenance_verifier.py
+++ b/src/macaron/provenance/provenance_verifier.py
@@ -16,6 +16,7 @@
 
 from macaron.config.defaults import defaults
 from macaron.config.global_config import global_config
+from macaron.output_reporter import find_report_output_path
 from macaron.provenance.provenance_extractor import ProvenancePredicate, SLSAGithubGenericBuildDefinitionV01
 from macaron.provenance.provenance_finder import ProvenanceAsset
 from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
@@ -336,7 +337,7 @@ def _verify_slsa(
         verified = "PASSED: SLSA verification passed" in output
         log_path = os.path.join(global_config.build_log_path, f"{os.path.basename(source_path)}.slsa_verifier.log")
         with open(log_path, mode="a", encoding="utf-8") as log_file:
-            logger.info("Storing SLSA verifier output for %s to %s", asset_name, os.path.relpath(log_path, os.getcwd()))
+            logger.info("Storing SLSA verifier output for %s to %s", asset_name, find_report_output_path(log_path))
             log_file.writelines(
                 [f"SLSA verifier output for cmd: {' '.join(cmd)}\n", output, "--------------------------------\n"]
             )
@@ -359,7 +360,7 @@ def _verify_slsa(
             )
             with open(error_log_path, mode="a", encoding="utf-8") as log_file:
                 logger.info(
-                    "Storing SLSA verifier log for%s to %s", asset_name, os.path.relpath(error_log_path, os.getcwd())
+                    "Storing SLSA verifier log for%s to %s", asset_name, find_report_output_path(error_log_path)
                 )
                 log_file.write(f"SLSA verifier output for cmd: {' '.join(cmd)}\n")
                 log_file.writelines(errors)
diff --git a/src/macaron/repo_finder/repo_utils.py b/src/macaron/repo_finder/repo_utils.py
index d99280e0f..56d48b42a 100644
--- a/src/macaron/repo_finder/repo_utils.py
+++ b/src/macaron/repo_finder/repo_utils.py
@@ -14,6 +14,7 @@
 
 from macaron.config.global_config import global_config
 from macaron.console import access_handler
+from macaron.output_reporter import find_report_output_path
 from macaron.slsa_analyzer.git_service import GIT_SERVICES, BaseGitService
 from macaron.slsa_analyzer.git_service.base_git_service import NoneGitService
 from macaron.slsa_analyzer.git_url import GIT_REPOS_DIR, decode_git_tags, parse_git_tags
@@ -78,7 +79,7 @@ def generate_report(purl: str, commit: str, repo: str, target_dir: str) -> bool:
     fullpath = f"{target_dir}/{filename}"
 
     os.makedirs(os.path.dirname(fullpath), exist_ok=True)
-    logger.info("Writing report to: %s", os.path.relpath(fullpath, os.getcwd()))
+    logger.info("Writing report to: %s", find_report_output_path(fullpath))
 
     try:
         with open(fullpath, "w", encoding="utf-8") as file:
@@ -87,10 +88,10 @@ def generate_report(purl: str, commit: str, repo: str, target_dir: str) -> bool:
         logger.debug("Failed to write report to file: %s", error)
         return False
 
-    logger.info("Report written to: %s", os.path.relpath(fullpath, os.getcwd()))
+    logger.info("Report written to: %s", find_report_output_path(fullpath))
 
     rich_handler = access_handler.get_handler()
-    rich_handler.update_find_source_table("JSON Report:", os.path.relpath(fullpath, os.getcwd()))
+    rich_handler.update_find_source_table("JSON Report:", find_report_output_path(fullpath))
 
     return True
 
diff --git a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
index 9df7e8e70..56979e055 100644
--- a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
+++ b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py
@@ -11,6 +11,7 @@
 from datetime import datetime
 
 from macaron.code_analyzer.dataflow_analysis.core import NodeForest
+from macaron.output_reporter import find_report_output_path
 from macaron.slsa_analyzer.git_service.api_client import BaseAPIClient
 from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
 
@@ -146,9 +147,7 @@ def has_kws_in_config(self, kws: list, build_tool_name: str, repo_path: str) ->
                                 line.strip(),
                             )
                             return keyword, config
-                logger.info(
-                    "No build command found for %s in %s", build_tool_name, os.path.relpath(file_path, os.getcwd())
-                )
+                logger.info("No build command found for %s in %s", build_tool_name, find_report_output_path(file_path))
                 return "", ""
             except FileNotFoundError as error:
                 logger.debug(error)
diff --git a/tests/config/test_defaults.py b/tests/config/test_defaults.py
index 45d138590..d0b09c5ee 100644
--- a/tests/config/test_defaults.py
+++ b/tests/config/test_defaults.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module tests the defaults module."""
@@ -9,7 +9,6 @@
 import pytest
 
 from macaron.config.defaults import create_defaults, defaults, load_defaults
-from macaron.config.global_config import global_config
 
 
 def test_load_defaults() -> None:
@@ -29,7 +28,7 @@ def test_load_defaults() -> None:
 def test_create_defaults() -> None:
     """Test dumping the default values."""
     output_dir = os.path.dirname(os.path.abspath(__file__))
-    assert create_defaults(output_dir, global_config.macaron_path) is True
+    assert create_defaults(output_dir) is True
 
 
 @pytest.mark.xfail(
@@ -38,7 +37,7 @@ def test_create_defaults() -> None:
 )
 def test_create_defaults_without_permission() -> None:
     """Test dumping default config in cases where the user does not have write permission to the output location."""
-    assert create_defaults(output_path="/", cwd_path="/") is False
+    assert create_defaults(output_path="/") is False
 
 
 @pytest.mark.parametrize(
diff --git a/tests/conftest.py b/tests/conftest.py
index cb2cee8c0..413de3498 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -82,7 +82,7 @@ def setup_test(test_dir: Path, macaron_path: Path) -> NoReturn:  # type: ignore
     """
     # Load values from defaults.ini.
     if not test_dir.joinpath("defaults.ini").exists():
-        create_defaults(str(test_dir), str(macaron_path))
+        create_defaults(str(test_dir))
 
     load_defaults(str(macaron_path))
     yield
diff --git a/tests/integration/cases/apache_maven_sbom/test.yaml b/tests/integration/cases/apache_maven_sbom/test.yaml
index 2e2e47a34..bd7a861a3 100644
--- a/tests/integration/cases/apache_maven_sbom/test.yaml
+++ b/tests/integration/cases/apache_maven_sbom/test.yaml
@@ -13,6 +13,7 @@ steps:
   kind: analyze
   options:
     ini: config.ini
+    output: output_dir
     command_args:
     - -purl
     - pkg:maven/org.apache.maven/maven@4.0.0-alpha-1-SNAPSHOT?type=pom
@@ -28,5 +29,5 @@ steps:
   kind: compare
   options:
     kind: deps_report
-    result: output/reports/maven/org_apache_maven/maven/dependencies.json
+    result: output_dir/reports/maven/org_apache_maven/maven/dependencies.json
     expected: dependencies.json
diff --git a/tests/integration/run.py b/tests/integration/run.py
index e1cec81cf..e78cf57a6 100644
--- a/tests/integration/run.py
+++ b/tests/integration/run.py
@@ -390,6 +390,7 @@ class AnalyzeStepOptions(TypedDict):
     expectation: str | None
     provenance: str | None
     sbom: str | None
+    output: str | None
 
 
 @dataclass
@@ -404,15 +405,7 @@ def options_schema(cwd: str) -> cfgv.Map:
             None,
             *[
                 cfgv.NoAdditionalKeys(
-                    [
-                        "main_args",
-                        "command_args",
-                        "env",
-                        "ini",
-                        "expectation",
-                        "provenance",
-                        "sbom",
-                    ],
+                    ["main_args", "command_args", "env", "ini", "expectation", "provenance", "sbom", "output"],
                 ),
                 cfgv.Optional(
                     key="main_args",
@@ -444,6 +437,11 @@ def options_schema(cwd: str) -> cfgv.Map:
                     check_fn=check_required_file(cwd),
                     default=None,
                 ),
+                cfgv.Optional(
+                    key="output",
+                    check_fn=cfgv.check_string,
+                    default=None,
+                ),
             ],
         )
 
@@ -454,6 +452,9 @@ def cmd(self, macaron_cmd: str) -> list[str]:
         ini_file = self.options.get("ini", None)
         if ini_file is not None:
             args.extend(["--defaults-path", ini_file])
+        output = self.options.get("output", None)
+        if output is not None:
+            args.extend(["--output", output])
         args.append("analyze")
         expectation_file = self.options.get("expectation", None)
         if expectation_file is not None:
diff --git a/tests/macaron_testcase.py b/tests/macaron_testcase.py
index a799d8f6e..9f94b4f39 100644
--- a/tests/macaron_testcase.py
+++ b/tests/macaron_testcase.py
@@ -26,7 +26,7 @@ def setUpClass(cls) -> None:
         """Set up the necessary values for the tests."""
         # Load values from defaults.ini.
         if not cls.macaron_test_dir.joinpath("defaults.ini").exists():
-            create_defaults(str(cls.macaron_test_dir), str(cls.macaron_path))
+            create_defaults(str(cls.macaron_test_dir))
 
         load_defaults(os.path.join(str(cls.macaron_test_dir), "defaults.ini"))
 
diff --git a/tests/test_main.py b/tests/test_main.py
index ce7f9d7c4..50f57b7d9 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Simple tests for the main method."""

From bd6880c5e899da1086dd24fa27d29a2950523747 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Thu, 8 Jan 2026 16:22:13 +1000
Subject: [PATCH 06/20] test: update log4j package version to the latest
 (#1276)

Update the log4j package version in integration tests to the latest version, for which GitHub workflow runs are not deleted, and update the documentations.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .github/workflows/test_macaron_action.yaml           |  4 ++--
 .../pages/tutorials/detect_malicious_java_dep.rst    | 12 ++++++------
 .../tutorials/detect_vulnerable_github_actions.rst   |  2 +-
 .../cases/org_apache_logging_log4j/policy_purl.dl    |  4 ++--
 .../cases/org_apache_logging_log4j/test.yaml         |  4 ++--
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml
index 2621313c6..930863d30 100644
--- a/.github/workflows/test_macaron_action.yaml
+++ b/.github/workflows/test_macaron_action.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 name: Test Macaron Action (tutorials)
@@ -111,7 +111,7 @@ jobs:
     - name: Run Macaron (analyze purl - log4j-core example)
       uses: ./
       with:
-        package_url: pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3
+        package_url: pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3
         output_dir: macaron_output/detect_vulnerable_github_actions
 
     - name: Run Macaron (verify policy - github_actions_vulns for purl)
diff --git a/docs/source/pages/tutorials/detect_malicious_java_dep.rst b/docs/source/pages/tutorials/detect_malicious_java_dep.rst
index 27bac87ba..02a2bc6df 100644
--- a/docs/source/pages/tutorials/detect_malicious_java_dep.rst
+++ b/docs/source/pages/tutorials/detect_malicious_java_dep.rst
@@ -25,7 +25,7 @@ dependencies:
    * - Artifact name
      - `Package URL (PURL) <https://github.com/package-url/purl-spec>`_
    * - `log4j-core <https://central.sonatype.com/artifact/org.apache.logging.log4j/log4j-core>`_
-     - ``pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3?type=jar``
+     - ``pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3?type=jar``
    * - `jackson-databind <https://central.sonatype.com/artifact/io.github.behnazh-w.demo/jackson-databind>`_
      - ``pkg:maven/io.github.behnazh-w.demo/jackson-databind@1.0?type=jar``
 
@@ -110,20 +110,20 @@ As you scroll down in the HTML report, you will see a section for the dependenci
 | Macaron has found the two dependencies as expected:
 
 * ``io.github.behnazh-w.demo:jackson-databind:1.0``
-* ``org.apache.logging.log4j:log4j-core:3.0.0-beta3``
+* ``org.apache.logging.log4j:log4j-core:2.25.3``
 
-When we open the reports for each dependency, we see that ``mcn_find_artifact_pipeline_1`` is passed for ``org.apache.logging.log4j:log4j-core:3.0.0-beta3``
-and a GitHub Actions workflow run is found for publishing version ``3.0.0-beta3``. However, this check is failing for ``io.github.behnazh-w.demo:jackson-databind:1.0``.
+When we open the reports for each dependency, we see that ``mcn_find_artifact_pipeline_1`` is passed for ``org.apache.logging.log4j:log4j-core:2.25.3``
+and a GitHub Actions workflow run is found for publishing version ``2.25.3``. However, this check is failing for ``io.github.behnazh-w.demo:jackson-databind:1.0``.
 This means that ``io.github.behnazh-w.demo:jackson-databind:1.0`` could have been built and published manually to Maven Central
 and could potentially be malicious.
 
 .. _fig_find_artifact_pipeline_log4j:
 
 .. figure:: ../../_static/images/tutorial_log4j_find_pipeline.png
-   :alt: mcn_find_artifact_pipeline_1 for org.apache.logging.log4j:log4j-core:3.0.0-beta3
+   :alt: mcn_find_artifact_pipeline_1 for org.apache.logging.log4j:log4j-core:2.25.3
    :align: center
 
-   ``org.apache.logging.log4j:log4j-core:3.0.0-beta3``
+   ``org.apache.logging.log4j:log4j-core:2.25.3``
 
 .. _fig_infer_artifact_pipeline_bh_jackson_databind:
 
diff --git a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst
index f6f1747cf..953523a82 100644
--- a/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst
+++ b/docs/source/pages/tutorials/detect_vulnerable_github_actions.rst
@@ -107,7 +107,7 @@ Alternatively, run the ``analyze`` command with the PURL of a package:
 
 .. code-block:: shell
 
-  ./run_macaron.sh analyze -purl pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3
+  ./run_macaron.sh analyze -purl pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3
 
 Then, ensure that the ``mcn_githubactions_vulnerabilities_1`` check passes for the component. You can create a similar policy to the one shown earlier and store it in a file (e.g., ``check_github_actions_vuln.dl``):
 
diff --git a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl
index f81ac7b07..49df9eba0 100644
--- a/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl
+++ b/tests/integration/cases/org_apache_logging_log4j/policy_purl.dl
@@ -1,4 +1,4 @@
-/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 #include "prelude.dl"
@@ -20,4 +20,4 @@ Policy("test_policy", component_id, "") :-
     is_repo_url(component_id, "https://github.com/apache/logging-log4j2").
 
 apply_policy_to("test_policy", component_id) :-
-    is_component(component_id, "pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3").
+    is_component(component_id, "pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3").
diff --git a/tests/integration/cases/org_apache_logging_log4j/test.yaml b/tests/integration/cases/org_apache_logging_log4j/test.yaml
index 8da5f01b6..7871c7a5a 100644
--- a/tests/integration/cases/org_apache_logging_log4j/test.yaml
+++ b/tests/integration/cases/org_apache_logging_log4j/test.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: |
@@ -15,7 +15,7 @@ steps:
   options:
     command_args:
     - -purl
-    - pkg:maven/org.apache.logging.log4j/log4j-core@3.0.0-beta3
+    - pkg:maven/org.apache.logging.log4j/log4j-core@2.25.3
 - name: Run macaron verify-policy to verify passed/failed checks
   kind: verify
   options:

From ed3a80f2a768542cc9598769ca4a0ec9f6a91d56 Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Thu, 15 Jan 2026 21:00:21 +1000
Subject: [PATCH 07/20] chore: infer interpreter version via Python tags in
 distribution names (#1265)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../build_spec_generator.py                   |  4 +-
 .../build_spec_generator/common_spec/core.py  |  2 +-
 .../common_spec/pypi_spec.py                  | 36 +++++++----
 .../dockerfile/pypi_dockerfile_output.py      | 64 +++++++++++++++++--
 4 files changed, 84 insertions(+), 22 deletions(-)

diff --git a/src/macaron/build_spec_generator/build_spec_generator.py b/src/macaron/build_spec_generator/build_spec_generator.py
index 9d7fd94ca..e66be4ac2 100644
--- a/src/macaron/build_spec_generator/build_spec_generator.py
+++ b/src/macaron/build_spec_generator/build_spec_generator.py
@@ -98,8 +98,8 @@ def gen_build_spec_for_purl(
             case BuildSpecFormat.DOCKERFILE:
                 try:
                     build_spec_content = gen_dockerfile(build_spec)
-                except ValueError as error:
-                    logger.error("Error while serializing the build spec: %s.", error)
+                except GenerateBuildSpecError as error:
+                    logger.error("Error while generating the build spec: %s.", error)
                     return os.EX_DATAERR
                 build_spec_file_path = os.path.join(build_spec_dir_path, "dockerfile.buildspec")
 
diff --git a/src/macaron/build_spec_generator/common_spec/core.py b/src/macaron/build_spec_generator/common_spec/core.py
index 26b2f329f..4c2cf1ecd 100644
--- a/src/macaron/build_spec_generator/common_spec/core.py
+++ b/src/macaron/build_spec_generator/common_spec/core.py
@@ -378,7 +378,7 @@ def gen_generic_build_spec(
             "purl": str(purl),
             "language": target_language,
             "build_tools": build_tool_names,
-            "build_commands": [selected_build_command],
+            "build_commands": [selected_build_command] if selected_build_command else [],
         }
     )
     ECOSYSTEMS[purl.type.upper()].value(base_build_spec_dict).resolve_fields(purl)
diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index 999afbb19..d9bfd4b82 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -155,6 +155,16 @@ def resolve_fields(self, purl: PackageURL) -> None:
                         chronologically_likeliest_version = (
                             pypi_package_json.get_chronologically_suitable_setuptools_version()
                         )
+                        try:
+                            # Get information from the wheel file name.
+                            logger.debug(pypi_package_json.wheel_filename)
+                            _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename)
+                            for tag in tags:
+                                wheel_name_python_version_list.append(tag.interpreter)
+                                wheel_name_platforms.add(tag.platform)
+                            logger.debug(python_version_set)
+                        except InvalidWheelFilename:
+                            logger.debug("Could not parse wheel file name to extract version")
                 except SourceCodeError:
                     logger.debug("Could not find pure wheel matching this PURL")
 
@@ -214,17 +224,6 @@ def resolve_fields(self, purl: PackageURL) -> None:
                     except (InvalidRequirement, InvalidSpecifier) as error:
                         logger.debug("Malformed requirement encountered %s : %s", requirement, error)
 
-                try:
-                    # Get information from the wheel file name.
-                    logger.debug(pypi_package_json.wheel_filename)
-                    _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename)
-                    for tag in tags:
-                        wheel_name_python_version_list.append(tag.interpreter)
-                        wheel_name_platforms.add(tag.platform)
-                    logger.debug(python_version_set)
-                except InvalidWheelFilename:
-                    logger.debug("Could not parse wheel file name to extract version")
-
                 self.data["language_version"] = list(python_version_set) or wheel_name_python_version_list
 
                 # Use the default build command for pure Python packages.
@@ -243,9 +242,18 @@ def resolve_fields(self, purl: PackageURL) -> None:
 
         if not patched_build_commands:
             # Resolve and patch build commands.
-            selected_build_commands = self.data["build_commands"] or self.get_default_build_commands(
-                self.data["build_tools"]
-            )
+
+            # To ensure that selected_build_commands is never empty, we seed with the fallback
+            # command of python -m build --wheel -n
+            if self.data["build_commands"]:
+                selected_build_commands = self.data["build_commands"]
+            else:
+                self.data["build_commands"] = ["python -m build --wheel -n".split()]
+                selected_build_commands = (
+                    self.get_default_build_commands(self.data["build_tools"]) or self.data["build_commands"]
+                )
+
+            logger.debug(selected_build_commands)
 
             patched_build_commands = (
                 patch_commands(
diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
index ef2360a5c..457cfe15c 100644
--- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
+++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
@@ -1,9 +1,10 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module implements the logic to generate a dockerfile from a Python buildspec."""
 
 import logging
+import re
 from textwrap import dedent
 
 from packaging.specifiers import InvalidSpecifier, SpecifierSet
@@ -35,8 +36,7 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
     """
     language_version: str | None = pick_specific_version(buildspec)
     if language_version is None:
-        logger.debug("Could not derive a specific interpreter version.")
-        raise GenerateBuildSpecError("Could not derive specific interpreter version.")
+        raise GenerateBuildSpecError("Could not derive specific interpreter version")
     backend_install_commands: str = " && ".join(build_backend_commands(buildspec))
     build_tool_install: str = ""
     if (
@@ -124,8 +124,18 @@ def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None:
         try:
             version_set &= SpecifierSet(version)
         except InvalidSpecifier as error:
-            logger.debug("Malformed interpreter version encountered: %s (%s)", version, error)
-            return None
+            logger.debug("Non-standard interpreter version encountered: %s (%s)", version, error)
+            # Whilst the Python tags specify interpreter implementation
+            # as well as version, with no standard way to parse out the
+            # implementation, we can attempt to heuristically:
+            try_parse_version = infer_interpreter_version(version)
+            if try_parse_version:
+                try:
+                    version_set &= SpecifierSet(f">={try_parse_version}")
+                except InvalidSpecifier as error_for_retry:
+                    logger.debug("Could not parse interpreter version from: %s (%s)", version, error_for_retry)
+
+    logger.debug(version_set)
 
     # Now to get the latest acceptable one, we can step through all interpreter
     # versions. For the most accurate result, we can query python.org for a
@@ -141,6 +151,50 @@ def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None:
     return None
 
 
+def infer_interpreter_version(specifier: str) -> str | None:
+    """Infer interpreter version from Python-tag.
+
+    Note: This function is called on version specifiers
+    that we cannot trivially parse. In the case that
+    it is a Python-tag, which is obtained from the
+    wheel name, we attempt to infer the corresponding
+    interpreter version.
+
+    Parameters
+    ----------
+    specifier: str
+        specifier string that could not be trivially parsed.
+
+    Returns
+    -------
+    str | None
+        The interpreter version inferred from the specifier, or
+        None if we cannot parse the specifier as a Python-tag.
+
+    Examples
+    --------
+    >>> infer_interpreter_version("py3")
+    '3'
+    >>> infer_interpreter_version("cp314")
+    '3.14'
+    >>> infer_interpreter_version("pypy311")
+    '3.11'
+    >>> infer_interpreter_version("malformed123")
+    """
+    # The primary alternative interpreter implementations are documented here:
+    # https://www.python.org/download/alternatives/
+    # We parse tags for these implementations using below regular expression:
+    pattern = re.compile(r"^(py|cp|ip|pp|pypy|jy|graalpy)(\d{1,3})$")
+    parsed_tag = pattern.match(specifier)
+    if parsed_tag:
+        digits = parsed_tag.group(2)
+        # As match succeeded len(digits) \in {1,2,3}
+        if len(digits) == 1:
+            return parsed_tag.group(2)
+        return f"{digits[0]}.{digits[1:]}"
+    return None
+
+
 def build_backend_commands(buildspec: BaseBuildSpecDict) -> list[str]:
     """Generate the installation commands for each inferred build backend.
 

From d995156f23e54cb7c879fc4ffdcc787505849f8b Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Mon, 19 Jan 2026 18:54:59 +1000
Subject: [PATCH 08/20] chore: address Value field issue for empty strings in
 dataflow analysis (#1281)

This patch addresses an issue in the dataflow analysis related to parsing single-quoted strings in bash scripts. Previously, for empty single-quoted strings the code incorrectly assumed that a Value field with an empty string would always be present.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 src/macaron/code_analyzer/dataflow_analysis/bash.py | 8 +++++---
 src/macaron/parsers/bashparser_model.py             | 4 ++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/macaron/code_analyzer/dataflow_analysis/bash.py b/src/macaron/code_analyzer/dataflow_analysis/bash.py
index f350448a5..4a4903c86 100644
--- a/src/macaron/code_analyzer/dataflow_analysis/bash.py
+++ b/src/macaron/code_analyzer/dataflow_analysis/bash.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Dataflow analysis implementation for analysing Bash shell scripts."""
@@ -1811,7 +1811,7 @@ def convert_shell_word_to_value(
     if dbl_quoted_parts is not None:
         return convert_shell_value_sequence_to_fact_value(dbl_quoted_parts, context), True
 
-    sgl_quoted_str = parse_sql_quoted_string(word)
+    sgl_quoted_str = parse_sgl_quoted_string(word)
     if sgl_quoted_str is not None:
         return facts.StringLiteral(sgl_quoted_str), True
 
@@ -1842,7 +1842,7 @@ def parse_dbl_quoted_string(word: bashparser_model.Word) -> list[LiteralOrEnvVar
     return None
 
 
-def parse_sql_quoted_string(word: bashparser_model.Word) -> str | None:
+def parse_sgl_quoted_string(word: bashparser_model.Word) -> str | None:
     """Parse single quoted string.
 
     If the given word is a single quoted string, return the string
@@ -1851,6 +1851,8 @@ def parse_sql_quoted_string(word: bashparser_model.Word) -> str | None:
     if len(word["Parts"]) == 1:
         part = word["Parts"][0]
         if bashparser_model.is_sgl_quoted(part):
+            if "Value" not in part:
+                return ""
             return part["Value"]
 
     return None
diff --git a/src/macaron/parsers/bashparser_model.py b/src/macaron/parsers/bashparser_model.py
index 09ca83813..edd2a6063 100644
--- a/src/macaron/parsers/bashparser_model.py
+++ b/src/macaron/parsers/bashparser_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Type definitions for Bash AST as produced (and json-serialised) by the "mvdan.cc/sh/v3/syntax" bash parser."""
@@ -159,7 +159,7 @@ class SglQuoted(TypedDict):
     Left: Pos
     Right: Pos
     Dollar: NotRequired[bool]
-    Value: str
+    Value: NotRequired[str]
 
 
 def is_sgl_quoted(part: WordPart) -> TypeGuard[SglQuoted]:

From 65ecc4ca5cfca9674cc315cb840a28630e3a1a50 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Tue, 20 Jan 2026 17:51:52 +1000
Subject: [PATCH 09/20] fix: add the missing provenance asset links to the
 reports (#1271)

Fixes missing provenance asset links in reports, by adding explicit links to provenance files.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 src/macaron/provenance/__init__.py            | 13 ++++++++++++
 src/macaron/provenance/provenance_finder.py   | 12 +----------
 src/macaron/slsa_analyzer/analyzer.py         |  3 ++-
 .../slsa_analyzer/git_service/api_client.py   | 12 ++++++-----
 .../slsa_analyzer/git_service/github.py       | 21 ++++++++++++-------
 5 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/src/macaron/provenance/__init__.py b/src/macaron/provenance/__init__.py
index a99afa31c..7e3c5a63b 100644
--- a/src/macaron/provenance/__init__.py
+++ b/src/macaron/provenance/__init__.py
@@ -2,3 +2,16 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This package contains the provenance tools for software components."""
+
+from dataclasses import dataclass
+
+from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
+
+
+@dataclass(frozen=True)
+class ProvenanceAsset:
+    """This class exists to hold a provenance payload with the original asset's name and URL."""
+
+    payload: InTotoPayload
+    name: str
+    url: str
diff --git a/src/macaron/provenance/provenance_finder.py b/src/macaron/provenance/provenance_finder.py
index 4935ca62d..0c1385d0f 100644
--- a/src/macaron/provenance/provenance_finder.py
+++ b/src/macaron/provenance/provenance_finder.py
@@ -6,7 +6,6 @@
 import logging
 import os
 import tempfile
-from dataclasses import dataclass
 from functools import partial
 
 from packageurl import PackageURL
@@ -14,6 +13,7 @@
 
 from macaron.artifact.local_artifact import get_local_artifact_hash
 from macaron.config.defaults import defaults
+from macaron.provenance import ProvenanceAsset
 from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
 from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder
 from macaron.repo_finder.repo_utils import get_repo_tags
@@ -30,7 +30,6 @@
 )
 from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset
 from macaron.slsa_analyzer.package_registry.pypi_registry import find_or_create_pypi_asset
-from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
 from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError
 from macaron.slsa_analyzer.provenance.loader import load_provenance_payload
 from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
@@ -41,15 +40,6 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-@dataclass(frozen=True)
-class ProvenanceAsset:
-    """This class exists to hold a provenance payload with the original asset's name and URL."""
-
-    payload: InTotoPayload
-    name: str
-    url: str
-
-
 class ProvenanceFinder:
     """This class is used to find and retrieve provenance files from supported registries."""
 
diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py
index 31b4f0937..a76e45e1b 100644
--- a/src/macaron/slsa_analyzer/analyzer.py
+++ b/src/macaron/slsa_analyzer/analyzer.py
@@ -527,9 +527,10 @@ def run_single(
             # Try to discover GitHub attestation for the target software component.
             artifact_hash = get_artifact_hash(parsed_purl, local_artifact_dirs, package_registries_info)
             if artifact_hash:
-                provenance_payload = git_service.get_attestation_payload(
+                provenance_asset = git_service.get_attestation(
                     analyze_ctx.component.repository.full_name, artifact_hash
                 )
+                provenance_payload = provenance_asset.payload if provenance_asset else None
                 if provenance_payload:
                     try:
                         provenance_repo_url, provenance_commit_digest = extract_repo_and_commit_from_provenance(
diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py
index 98012fae8..9921c2dc9 100644
--- a/src/macaron/slsa_analyzer/git_service/api_client.py
+++ b/src/macaron/slsa_analyzer/git_service/api_client.py
@@ -648,8 +648,8 @@ def download_asset(self, url: str, download_path: str) -> bool:
 
         return download_file_with_size_limit(url, headers, download_path, timeout, size_limit)
 
-    def get_attestation(self, full_name: str, artifact_hash: str) -> dict:
-        """Download and return the attestation associated with the passed artifact hash, if any.
+    def get_attestation(self, full_name: str, artifact_hash: str) -> tuple[str | None, dict]:
+        """Download and return the attestation url and content associated with the passed artifact hash, if any.
 
         Parameters
         ----------
@@ -660,12 +660,14 @@ def get_attestation(self, full_name: str, artifact_hash: str) -> dict:
 
         Returns
         -------
-        dict
-            The attestation data, or an empty dict if not found.
+        tuple[str|None,dict]
+            The attestation url and data, or None and an empty dict if not found.
         """
         url = f"{GhAPIClient._REPO_END_POINT}/{full_name}/attestations/sha256:{artifact_hash}"
         response_data = send_get_http(url, self.headers)
-        return response_data or {}
+        if not response_data:
+            return (None, {})
+        return (url, response_data)
 
 
 def get_default_gh_client(access_token: str) -> GhAPIClient:
diff --git a/src/macaron/slsa_analyzer/git_service/github.py b/src/macaron/slsa_analyzer/git_service/github.py
index ff7ecc593..d5e1c8548 100644
--- a/src/macaron/slsa_analyzer/git_service/github.py
+++ b/src/macaron/slsa_analyzer/git_service/github.py
@@ -9,10 +9,11 @@
 from macaron.config.global_config import global_config
 from macaron.errors import ConfigurationError, RepoCheckOutError
 from macaron.json_tools import json_extract
+from macaron.provenance import ProvenanceAsset
 from macaron.slsa_analyzer import git_url
 from macaron.slsa_analyzer.git_service.api_client import GhAPIClient, get_default_gh_client
 from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService
-from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ValidateInTotoPayloadError, validate_intoto_payload
+from macaron.slsa_analyzer.provenance.intoto import ValidateInTotoPayloadError, validate_intoto_payload
 from macaron.slsa_analyzer.provenance.loader import decode_provenance
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -96,7 +97,7 @@ def check_out_repo(self, git_obj: Git, branch: str, digest: str, offline_mode: b
 
         return git_obj
 
-    def get_attestation_payload(self, repository_name: str, artifact_hash: str) -> InTotoPayload | None:
+    def get_attestation(self, repository_name: str, artifact_hash: str) -> ProvenanceAsset | None:
         """Get the GitHub attestation associated with the given PURL, or None if it cannot be found.
 
         The schema of GitHub attestation can be found on the API page:
@@ -111,12 +112,12 @@ def get_attestation_payload(self, repository_name: str, artifact_hash: str) -> I
 
         Returns
         -------
-        InTotoPayload | None
-            The attestation payload, if found.
+        ProvenanceAsset | None
+            The provenance asset, if found.
         """
-        git_attestation_dict = self.api_client.get_attestation(repository_name, artifact_hash)
+        attestation_url, git_attestation_dict = self.api_client.get_attestation(repository_name, artifact_hash)
 
-        if not git_attestation_dict:
+        if not attestation_url or not git_attestation_dict:
             return None
 
         git_attestation_list = json_extract(git_attestation_dict, ["attestations"], list)
@@ -124,9 +125,13 @@ def get_attestation_payload(self, repository_name: str, artifact_hash: str) -> I
             return None
 
         payload = decode_provenance(git_attestation_list[0])
-
+        validated_payload = None
         try:
-            return validate_intoto_payload(payload)
+            validated_payload = validate_intoto_payload(payload)
         except ValidateInTotoPayloadError as error:
             logger.debug("Invalid attestation payload: %s", error)
             return None
+        if not validated_payload:
+            return None
+
+        return ProvenanceAsset(validated_payload, artifact_hash, attestation_url)

From 2d54593de3098777db2ac5df0d941885561a1fd9 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Wed, 28 Jan 2026 14:24:13 +1000
Subject: [PATCH 10/20] chore(deps): update semgrep to v1.149.0 and ignore
 GHSA-7gcm-g887-7qv7 temporarily (#1290)

This PR updates semgrep to v1.149.0 and temporarily ignores GHSA-7gcm-g887-7qv7 until the fix is available.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 Makefile       | 5 +++--
 pyproject.toml | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 173298790..83b0d2d5c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 # Use bash as the shell when executing a rule's recipe. For more details:
@@ -274,12 +274,13 @@ requirements.txt: pyproject.toml
 # editable mode (like the one in development here) because they may not have
 # a PyPI entry; also print out CVE description and potential fixes if audit
 # found an issue.
+# Ignoring GHSA-7gcm-g887-7qv7: remove the exception when a fix is available.
 .PHONY: audit
 audit:
 	if ! $$(python -c "import pip_audit" &> /dev/null); then \
 	  echo "No package pip_audit installed, upgrade your environment!" && exit 1; \
 	fi;
-	python -m pip_audit --skip-editable --desc on --fix --dry-run
+	python -m pip_audit --skip-editable --desc on --fix --dry-run --ignore-vuln GHSA-7gcm-g887-7qv7
 
 # Run some or all checks over the package code base.
 .PHONY: check check-code check-bandit check-flake8 check-lint check-mypy check-go check-actionlint
diff --git a/pyproject.toml b/pyproject.toml
index 65fd534dc..0c0f16641 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 # https://flit.pypa.io/en/latest/pyproject_toml.html
@@ -37,7 +37,7 @@ dependencies = [
     "beautifulsoup4 >= 4.12.0,<5.0.0",
     "problog >= 2.2.6,<3.0.0",
     "cryptography >=44.0.0,<45.0.0",
-    "semgrep == 1.113.0",
+    "semgrep == 1.149.0",
     "email-validator >=2.2.0,<3.0.0",
     "rich >=13.5.3,<15.0.0",
     "lark >= 1.3.0,<2.0.0",

From aebdf78e7eab15e5c7ac510f741fd9e7dcb0943f Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Wed, 4 Feb 2026 15:25:50 +0530
Subject: [PATCH 11/20] feat: include has_binaries flag in build spec (#1278)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../common_spec/base_spec.py                  |   5 +-
 .../common_spec/pypi_spec.py                  | 111 ++++++++++--------
 .../dockerfile/pypi_dockerfile_output.py      |   2 +
 src/macaron/errors.py                         |   6 +-
 .../package_registry/pypi_registry.py         |  62 +++++++++-
 .../dockerfile/test_pypi_dockerfile_output.py |   3 +-
 .../expected_default.buildspec                |   3 +-
 .../expected_default.buildspec                |   3 +-
 .../pypi_toga/expected_default.buildspec      |   1 +
 .../expected_default.buildspec                |  26 ++++
 .../cases/pypi_tree-sitter/test.yaml          |  38 ++++++
 11 files changed, 199 insertions(+), 61 deletions(-)
 create mode 100644 tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
 create mode 100644 tests/integration/cases/pypi_tree-sitter/test.yaml

diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py
index c567609f7..698a0b948 100644
--- a/src/macaron/build_spec_generator/common_spec/base_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/base_spec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module includes base build specification and helper classes."""
@@ -81,6 +81,9 @@ class BaseBuildSpecDict(TypedDict, total=False):
     #: be a list of these that were used in building the wheel alongside their version.
     build_backends: NotRequired[list[str]]
 
+    #: Flag to indicate if the artifact includes binaries.
+    has_binaries: NotRequired[bool]
+
 
 class BaseBuildSpec(ABC):
     """Abstract base class for build specification behavior and field resolution."""
diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index d9bfd4b82..0471afd72 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module includes build specification and helper classes for PyPI packages."""
@@ -14,10 +14,9 @@
 from packaging.specifiers import InvalidSpecifier
 from packaging.utils import InvalidWheelFilename, parse_wheel_filename
 
-from macaron.build_spec_generator.build_command_patcher import CLI_COMMAND_PATCHES, patch_commands
 from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict
 from macaron.config.defaults import defaults
-from macaron.errors import GenerateBuildSpecError, SourceCodeError
+from macaron.errors import GenerateBuildSpecError, SourceCodeError, WheelTagError
 from macaron.json_tools import json_extract
 from macaron.slsa_analyzer.package_registry import pypi_registry
 from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
@@ -114,9 +113,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
 
         pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info)
         patched_build_commands: list[list[str]] = []
-        build_requires_set: set[str] = set()
         build_backends_set: set[str] = set()
         parsed_build_requires: dict[str, str] = {}
+        sdist_build_requires: dict[str, str] = {}
         python_version_set: set[str] = set()
         wheel_name_python_version_list: list[str] = []
         wheel_name_platforms: set[str] = set()
@@ -134,8 +133,16 @@ def resolve_fields(self, purl: PackageURL) -> None:
                         if py_version := json_extract(release, ["requires_python"], str):
                             python_version_set.add(py_version.replace(" ", ""))
 
+                self.data["has_binaries"] = not pypi_package_json.has_pure_wheel()
+
+                if self.data["has_binaries"]:
+                    logger.debug("Can not find a pure wheel")
+                else:
+                    logger.debug("Found pure wheel matching this PURL")
+
                 try:
-                    with pypi_package_json.wheel():
+                    # The wheel function handles downloading binaries in the case that we cannot find a pure wheel.
+                    with pypi_package_json.wheel(download_binaries=self.data["has_binaries"]):
                         logger.debug("Wheel at %s", pypi_package_json.wheel_path)
                         # Should only have .dist-info directory.
                         logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path)))
@@ -165,8 +172,10 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             logger.debug(python_version_set)
                         except InvalidWheelFilename:
                             logger.debug("Could not parse wheel file name to extract version")
+                except WheelTagError:
+                    logger.debug("Can not analyze non-pure wheels")
                 except SourceCodeError:
-                    logger.debug("Could not find pure wheel matching this PURL")
+                    logger.debug("Could not download wheel matching this PURL")
 
                 logger.debug("From .dist_info:")
                 logger.debug(parsed_build_requires)
@@ -179,27 +188,32 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             content = tomli.loads(pyproject_content.decode("utf-8"))
                             requires = json_extract(content, ["build-system", "requires"], list)
                             if requires:
-                                build_requires_set.update(elem.replace(" ", "") for elem in requires)
+                                for requirement in requires:
+                                    self.add_parsed_requirement(sdist_build_requires, requirement)
                             # If we cannot find `requires` in `[build-system]`, we lean on the fact that setuptools
                             # was the de-facto build tool, and infer a setuptools version to include.
                             else:
-                                build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
+                                self.add_parsed_requirement(
+                                    sdist_build_requires, f"setuptools=={chronologically_likeliest_version}"
+                                )
                             backend = json_extract(content, ["build-system", "build-backend"], str)
                             if backend:
                                 build_backends_set.add(backend.replace(" ", ""))
                             python_version_constraint = json_extract(content, ["project", "requires-python"], str)
                             if python_version_constraint:
                                 python_version_set.add(python_version_constraint.replace(" ", ""))
-                            self.apply_tool_specific_inferences(build_requires_set, python_version_set, content)
+                            self.apply_tool_specific_inferences(sdist_build_requires, python_version_set, content)
                             logger.debug(
                                 "After analyzing pyproject.toml from the sdist: build-requires: %s, build_backend: %s",
-                                build_requires_set,
+                                sdist_build_requires,
                                 build_backends_set,
                             )
                             # Here we have successfully analyzed the pyproject.toml file. Now, if we have a setup.py/cfg,
                             # we also need to infer a setuptools version to infer.
                             if pypi_package_json.file_exists("setup.py") or pypi_package_json.file_exists("setup.cfg"):
-                                build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
+                                self.add_parsed_requirement(
+                                    sdist_build_requires, f"setuptools=={chronologically_likeliest_version}"
+                                )
                         except TypeError as error:
                             logger.debug(
                                 "Found a type error while reading the pyproject.toml file from the sdist: %s", error
@@ -210,26 +224,23 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             logger.debug("No pyproject.toml found: %s", error)
                             # Here we do not have a pyproject.toml file. Instead, we lean on the fact that setuptools
                             # was the de-facto build tool, and infer a setuptools version to include.
-                            build_requires_set.add(f"setuptools=={chronologically_likeliest_version}")
+                            self.add_parsed_requirement(
+                                sdist_build_requires, f"setuptools=={chronologically_likeliest_version}"
+                            )
                 except SourceCodeError as error:
                     logger.debug("No source distribution found: %s", error)
 
+                logger.debug("After complete analysis of the sdist:")
+                logger.debug(sdist_build_requires)
+
                 # Merge in pyproject.toml information only when the wheel dist_info does not contain the same.
                 # Hatch is an interesting example of this merge being required.
-                for requirement in build_requires_set:
-                    try:
-                        parsed_requirement = Requirement(requirement)
-                        if parsed_requirement.name not in parsed_build_requires:
-                            parsed_build_requires[parsed_requirement.name] = str(parsed_requirement.specifier)
-                    except (InvalidRequirement, InvalidSpecifier) as error:
-                        logger.debug("Malformed requirement encountered %s : %s", requirement, error)
+                for requirement_name, specifier in sdist_build_requires.items():
+                    if requirement_name not in parsed_build_requires:
+                        parsed_build_requires[requirement_name] = specifier
 
                 self.data["language_version"] = list(python_version_set) or wheel_name_python_version_list
 
-                # Use the default build command for pure Python packages.
-                if "any" in wheel_name_platforms:
-                    patched_build_commands = self.get_default_build_commands(self.data["build_tools"])
-
         # If we were not able to find any build  and backends, use the default setuptools.
         if not parsed_build_requires:
             parsed_build_requires["setuptools"] = "==" + defaults.get("heuristic.pypi", "default_setuptools")
@@ -239,44 +250,39 @@ def resolve_fields(self, purl: PackageURL) -> None:
         logger.debug("Combined build-requires: %s", parsed_build_requires)
         self.data["build_requires"] = parsed_build_requires
         self.data["build_backends"] = list(build_backends_set)
+        # We do not generate a build command for non-pure packages
+        if not self.data["has_binaries"]:
+            patched_build_commands = self.get_default_build_commands(self.data["build_tools"])
+        self.data["build_commands"] = patched_build_commands
 
-        if not patched_build_commands:
-            # Resolve and patch build commands.
-
-            # To ensure that selected_build_commands is never empty, we seed with the fallback
-            # command of python -m build --wheel -n
-            if self.data["build_commands"]:
-                selected_build_commands = self.data["build_commands"]
-            else:
-                self.data["build_commands"] = ["python -m build --wheel -n".split()]
-                selected_build_commands = (
-                    self.get_default_build_commands(self.data["build_tools"]) or self.data["build_commands"]
-                )
-
-            logger.debug(selected_build_commands)
-
-            patched_build_commands = (
-                patch_commands(
-                    cmds_sequence=selected_build_commands,
-                    patches=CLI_COMMAND_PATCHES,
-                )
-                or []
-            )
-            if not patched_build_commands:
-                raise GenerateBuildSpecError(f"Failed to patch command sequences {selected_build_commands}.")
+    def add_parsed_requirement(self, build_requirements: dict[str, str], requirement: str) -> None:
+        """
+        Parse a requirement string and add it to build_requirements, doing appropriate error handling.
 
-        self.data["build_commands"] = patched_build_commands
+        Parameters
+        ----------
+        build_requirements: dict[str,str]
+            Dictionary of build requirements to populate.
+        requirement: str
+            Requirement string to parse.
+        """
+        try:
+            parsed_requirement = Requirement(requirement)
+            if parsed_requirement.name not in build_requirements:
+                build_requirements[parsed_requirement.name] = str(parsed_requirement.specifier)
+        except (InvalidRequirement, InvalidSpecifier) as error:
+            logger.debug("Malformed requirement encountered %s : %s", requirement, error)
 
     def apply_tool_specific_inferences(
-        self, build_requires_set: set[str], python_version_set: set[str], pyproject_contents: dict[str, Any]
+        self, build_requirements: dict[str, str], python_version_set: set[str], pyproject_contents: dict[str, Any]
     ) -> None:
         """
         Based on build tools inferred, look into the pyproject.toml for related additional dependencies.
 
         Parameters
         ----------
-        build_requires_set: set[str]
-            Set of build requirements to populate.
+        build_requirements: dict[str,str]
+            Dictionary of build requirements to populate.
         python_version_set: set[str]
             Set of compatible interpreter versions to populate.
         pyproject_contents: dict[str, Any]
@@ -291,7 +297,8 @@ def apply_tool_specific_inferences(
                 for _, section in hatch_build_hooks.items():
                     dependencies = section.get("dependencies")
                     if dependencies:
-                        build_requires_set.update(elem.replace(" ", "") for elem in dependencies)
+                        for requirement in dependencies:
+                            self.add_parsed_requirement(build_requirements, requirement)
         # If we have flit as a build_tool, we will check if the legacy header [tool.flit.metadata] exists,
         # and if so, check to see if we can use its "requires-python".
         if "flit" in self.data["build_tools"]:
diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
index 457cfe15c..adb956346 100644
--- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
+++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
@@ -34,6 +34,8 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
     GenerateBuildSpecError
         Raised if dockerfile cannot be generated.
     """
+    if buildspec["has_binaries"]:
+        raise GenerateBuildSpecError("We currently do not support generating a dockerfile for non-pure Python packages")
     language_version: str | None = pick_specific_version(buildspec)
     if language_version is None:
         raise GenerateBuildSpecError("Could not derive specific interpreter version")
diff --git a/src/macaron/errors.py b/src/macaron/errors.py
index d088914de..569ec1817 100644
--- a/src/macaron/errors.py
+++ b/src/macaron/errors.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains error classes for Macaron."""
@@ -129,3 +129,7 @@ class QueryMacaronDatabaseError(Exception):
 
 class GenerateBuildSpecError(Exception):
     """Happens when there is an unexpected error while generating the build spec file."""
+
+
+class WheelTagError(MacaronError):
+    """Happens when a Python wheel with unsupported tags is requested for analysis."""
diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
index ce8630d37..e11c8260a 100644
--- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
+++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """The module provides abstractions for the pypi package registry."""
@@ -22,9 +22,10 @@
 
 import requests
 from bs4 import BeautifulSoup, Tag
+from packaging.utils import InvalidWheelFilename, parse_wheel_filename
 
 from macaron.config.defaults import defaults
-from macaron.errors import ConfigurationError, InvalidHTTPResponseError, SourceCodeError
+from macaron.errors import ConfigurationError, InvalidHTTPResponseError, SourceCodeError, WheelTagError
 from macaron.json_tools import json_extract
 from macaron.malware_analyzer.datetime_parser import parse_datetime
 from macaron.slsa_analyzer.package_registry.package_registry import PackageRegistry
@@ -863,8 +864,29 @@ def get_latest_release_upload_time(self) -> str | None:
         return None
 
     @contextmanager
-    def wheel(self) -> Generator[None]:
-        """Download and cleanup wheel of the package with a context manager."""
+    def wheel(self, download_binaries: bool) -> Generator[None]:
+        """Download and cleanup wheel of the package with a context manager.
+
+        Parameters
+        ----------
+        download_binaries: bool
+            Whether or not to download a wheel with binaries.
+
+        Returns
+        -------
+        Generator[None]
+            Generator that yields None and takes care of resource cleanup on
+            exiting the context in which it was called
+
+        Raises
+        ------
+        WheelTagError
+            If download_binaries is True
+        SourceCodeError
+            If we are unable to download the requested wheel
+        """
+        if download_binaries:
+            raise WheelTagError("Macaron does not currently support analysis of non-pure Python wheels.")
         if not self.download_wheel():
             raise SourceCodeError("Unable to download requested wheel.")
         yield
@@ -889,6 +911,38 @@ def download_wheel(self) -> bool:
                 logger.debug(error)
         return False
 
+    def has_pure_wheel(self) -> bool:
+        """Check whether the PURL has a pure wheel from its package json.
+
+        Returns
+        -------
+        bool
+            Whether the PURL has a pure wheel or not.
+        """
+        if self.component_version:
+            urls = json_extract(self.package_json, ["releases", self.component_version], list)
+        else:
+            # Get the latest version.
+            urls = json_extract(self.package_json, ["urls"], list)
+        if not urls:
+            return False
+        for distribution in urls:
+            file_name: str = distribution.get("filename") or ""
+            # Parse out and check none and any
+            # Catch exceptions
+            try:
+                _, _, _, tags = parse_wheel_filename(file_name)
+                # Check if none and any are in the tags (i.e. the wheel is pure)
+                # Technically a wheel can have multiple tag sets. Our condition for
+                # a pure wheel is that it has only one tag set with abi "none" and
+                # platform "any"
+                if len(tags) == 1 and all(tag.abi == "none" and tag.platform == "any" for tag in tags):
+                    return True
+            except InvalidWheelFilename:
+                logger.debug("Could not parse wheel name.")
+                return False
+        return False
+
     @contextmanager
     def sourcecode(self) -> Generator[None]:
         """Download and cleanup source code of the package with a context manager."""
diff --git a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
index b62ea049a..c8d4d8882 100644
--- a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
+++ b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """
@@ -27,6 +27,7 @@ def fixture_base_build_spec() -> BaseBuildSpecDict:
             "ecosystem": "pypi",
             "purl": "pkg:pypi/cachetools@6.2.1",
             "language": "python",
+            "has_binaries": False,
             "build_tools": ["pip"],
             "build_commands": [["python", "-m", "build"]],
             "build_requires": {"setuptools": "==80.9.0", "wheel": ""},
diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
index 0b5d8acfa..2a05c0e95 100644
--- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec
+++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
@@ -1,5 +1,5 @@
 {
-    "macaron_version": "0.18.0",
+    "macaron_version": "0.20.0",
     "group_id": null,
     "artifact_id": "cachetools",
     "version": "6.2.1",
@@ -24,6 +24,7 @@
             "-n"
         ]
     ],
+    "has_binaries": false,
     "build_requires": {
         "setuptools": "==80.9.0",
         "wheel": ""
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
index e7842d046..e610ee866 100644
--- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
@@ -1,5 +1,5 @@
 {
-    "macaron_version": "0.18.0",
+    "macaron_version": "0.20.0",
     "group_id": null,
     "artifact_id": "markdown-it-py",
     "version": "4.0.0",
@@ -21,6 +21,7 @@
             "build"
         ]
     ],
+    "has_binaries": false,
     "build_requires": {
         "flit": "==3.12.0",
         "flit_core": "<4,>=3.4"
diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec
index 819113207..875523655 100644
--- a/tests/integration/cases/pypi_toga/expected_default.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_default.buildspec
@@ -24,6 +24,7 @@
             "-n"
         ]
     ],
+    "has_binaries": false,
     "build_requires": {
         "setuptools": "==80.3.1",
         "setuptools_dynamic_dependencies": "==1.0.0",
diff --git a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
new file mode 100644
index 000000000..5eccc6d34
--- /dev/null
+++ b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
@@ -0,0 +1,26 @@
+{
+    "macaron_version": "0.20.0",
+    "group_id": null,
+    "artifact_id": "tree-sitter",
+    "version": "0.25.2",
+    "git_repo": "https://github.com/tree-sitter/py-tree-sitter",
+    "git_tag": "e2a5b21449c30c6a4fb49a55567a4699c3271f10",
+    "newline": "lf",
+    "language_version": [
+        ">=3.10"
+    ],
+    "ecosystem": "pypi",
+    "purl": "pkg:pypi/tree-sitter@0.25.2",
+    "language": "python",
+    "build_tools": [
+        "pip"
+    ],
+    "build_commands": [],
+    "has_binaries": true,
+    "build_requires": {
+        "setuptools": ">=43"
+    },
+    "build_backends": [
+        "setuptools.build_meta"
+    ]
+}
diff --git a/tests/integration/cases/pypi_tree-sitter/test.yaml b/tests/integration/cases/pypi_tree-sitter/test.yaml
new file mode 100644
index 000000000..13cf9d7d7
--- /dev/null
+++ b/tests/integration/cases/pypi_tree-sitter/test.yaml
@@ -0,0 +1,38 @@
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+description: |
+  Test buildspec generation for a non-pure wheel.
+
+tags:
+- macaron-python-package
+- tutorial
+
+steps:
+- name: Run macaron analyze
+  kind: analyze
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/tree-sitter@0.25.2
+- name: Generate the buildspec
+  kind: gen-build-spec
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/tree-sitter@0.25.2
+- name: Compare Buildspec.
+  kind: compare
+  options:
+    kind: default_build_spec
+    result: output/buildspec/pypi/tree-sitter/macaron.buildspec
+    expected: expected_default.buildspec
+- name: Generate the buildspec
+  kind: gen-build-spec
+  expect_fail: true
+  options:
+    command_args:
+    - -purl
+    - pkg:pypi/markdown-it-py@0.25.2
+    - --output-format
+    - dockerfile

From f1d9ac405621b3430d73df6db912ff935210c739 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Fri, 6 Feb 2026 14:16:57 +1000
Subject: [PATCH 12/20] refactor!: improve Macaron wheel name and add a new
 install script (#1291)

This PR introduces breaking changes by improving the Macaron wheel naming for clearer platform and architecture identification, and by adding a new installation script. It also makes the slsa-verifier installation optional and adjusts dependency handling for security advisories.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 CONTRIBUTING.md                               |   6 +
 Makefile                                      |  95 +++++++++-----
 docker/Dockerfile.final                       |   9 ++
 docs/source/pages/installation.rst            |  53 +++++++-
 .../release_scripts/install_macaron_python.sh | 123 ++++++++++++++++++
 src/macaron/provenance/provenance_verifier.py |  35 ++++-
 .../micronaut-test.dl                         |   3 +-
 .../test.yaml                                 |   6 +-
 .../cases/ossf_scorecard/test.yaml            |   3 +-
 .../cases/urllib3_expectation_dir/test.yaml   |   6 +-
 .../cases/urllib3_expectation_file/test.yaml  |   3 +-
 .../urllib3_invalid_expectation/test.yaml     |   3 +-
 12 files changed, 290 insertions(+), 55 deletions(-)
 create mode 100755 scripts/release_scripts/install_macaron_python.sh

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7e003927e..78e293085 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -116,6 +116,12 @@ make setup
 
 **Note**: Running the above command will prompt you for sudo access to install [Soufflé Datalog engine](https://github.com/souffle-lang/souffle). You can install Soufflé on your system before running `make setup` to avoid getting prompted.
 
+**Note**: The [slsa-verifier](https://github.com/slsa-framework/slsa-verifier) dependency needs to be installed separately using the following command. This dependency is only used to verify some provenances, so you might not always need it for development.
+
+```bash
+make install-slsa-verifier
+```
+
 With that in place, you’re ready to build and contribute to Macaron!
 
 ### Updating dependent packages
diff --git a/Makefile b/Makefile
index 83b0d2d5c..1209a07b3 100644
--- a/Makefile
+++ b/Makefile
@@ -5,12 +5,35 @@
 # https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html
 SHELL := bash
 
-# Set the package's name, version, and path for use throughout the Makefile.
+# Set the package's name and version for use throughout the Makefile.
 PACKAGE_NAME := macaron
 PACKAGE_VERSION := $(shell python -c $$'try: import $(PACKAGE_NAME); print($(PACKAGE_NAME).__version__);\nexcept: print("unknown");')
+
+# Determine the OS,architecture, and number of cores.
+OS := $(shell uname -s)
+ifeq ($(OS),Darwin)
+  PLATFORM_NAME := macosx
+  OS_DISTRO := "Darwin"
+else
+  ifeq ($(OS),Linux)
+	PLATFORM_NAME := linux
+    OS_DISTRO := "$(shell grep '^NAME=' /etc/os-release | sed 's/^NAME=//' | sed 's/"//g')"
+    OS_MAJOR_VERSION := "$(shell grep '^VERSION=' /etc/os-release | sed -r 's/^[^0-9]+([0-9]+)\..*/\1/')"
+  endif
+endif
+ARCH := $(shell uname -m)
+NPROC := $(shell nproc)
+
+# Construct short package identifier.
+PACKAGE_SDIST_NAME := $(PACKAGE_NAME)-$(PACKAGE_VERSION)
+
+# Construct full package identifier.
+PACKAGE_WHEEL_DIST_NAME := $(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-$(PLATFORM_NAME)_$(ARCH)
+
+# Set the Python version, package, and repo paths.
+PYTHON ?= python3.11
 PACKAGE_PATH := $(shell pwd)/src/$(PACKAGE_NAME)
 REPO_PATH := $(shell pwd)
-PYTHON ?= python3.11
 
 # This variable contains the first goal that matches any of the listed goals
 # here, else it contains an empty string. The net effect is to filter out
@@ -93,26 +116,28 @@ setup: force-upgrade setup-go setup-binaries setup-schemastore
 	go install github.com/CycloneDX/cyclonedx-gomod/cmd/cyclonedx-gomod@v1.3.0
 setup-go:
 	go build -o $(PACKAGE_PATH)/bin/ $(REPO_PATH)/golang/cmd/...
-setup-binaries: $(PACKAGE_PATH)/bin/slsa-verifier souffle gnu-sed
+setup-binaries: souffle gnu-sed
 
-# Install SLSA Verifier.
+# Install SLSA Verifier if not already installed.
+# Get the checksum from https://github.com/slsa-framework/slsa-verifier/blob/main/SHA256SUM.md.
 SLSA_VERIFIER_TAG := v2.7.1
 SLSA_VERIFIER_BIN := slsa-verifier-linux-amd64
-SLSA_VERIFIER_BIN_PATH := $(PACKAGE_PATH)/bin/$(SLSA_VERIFIER_BIN)
-SLSA_VERIFIER_PROVENANCE := $(SLSA_VERIFIER_BIN).intoto.jsonl
-SLSA_VERIFIER_PROVENANCE_PATH := $(PACKAGE_PATH)/bin/$(SLSA_VERIFIER_PROVENANCE)
-
-$(PACKAGE_PATH)/bin/slsa-verifier:
-	mkdir -p $(PACKAGE_PATH)/bin \
-    	&& wget -O $(PACKAGE_PATH)/bin/slsa-verifier https://github.com/slsa-framework/slsa-verifier/releases/download/$(SLSA_VERIFIER_TAG)/$(SLSA_VERIFIER_BIN) \
-    	&& wget -O $(SLSA_VERIFIER_PROVENANCE_PATH) https://github.com/slsa-framework/slsa-verifier/releases/download/$(SLSA_VERIFIER_TAG)/$(SLSA_VERIFIER_PROVENANCE) \
-    	&& chmod +x $(PACKAGE_PATH)/bin/slsa-verifier \
-		&& EXPECTED_HASH=$$(jq -r '.payload' $(SLSA_VERIFIER_PROVENANCE_PATH) | base64 -d | jq -r '.subject[] | select(.name == "$(SLSA_VERIFIER_BIN)") | .digest.sha256') \
-		&& ACTUAL_HASH=$$(sha256sum $(PACKAGE_PATH)/bin/slsa-verifier | awk '{print $$1}'); \
-		if [ "$$EXPECTED_HASH" != "$$ACTUAL_HASH" ]; then \
-			echo "Hash mismatch: expected $$EXPECTED_HASH, got $$ACTUAL_HASH"; \
-			exit 1; \
-		fi
+SLSA_VERIFIER_BIN_PATH := $(HOME)/.local/bin
+SLSA_VERIFIER_CHECKSUM := 946dbec729094195e88ef78e1734324a27869f03e2c6bd2f61cbc06bd5350339
+.PHONY: install-slsa-verifier
+install-slsa-verifier:
+	if ! command -v slsa-verifier >/dev/null 2>&1; then \
+	  mkdir -p $(SLSA_VERIFIER_BIN_PATH) \
+	    && curl --fail -L -o $(SLSA_VERIFIER_BIN_PATH)/slsa-verifier https://github.com/slsa-framework/slsa-verifier/releases/download/$(SLSA_VERIFIER_TAG)/$(SLSA_VERIFIER_BIN) \
+	    && SLSA_VERIFIER_COMPUTED_HASH=$$(sha256sum $(SLSA_VERIFIER_BIN_PATH)/slsa-verifier | cut -d' ' -f1) \
+	    && if [ $$SLSA_VERIFIER_COMPUTED_HASH != $(SLSA_VERIFIER_CHECKSUM) ]; then \
+	      echo "slsa-verifier checksum could not be verified. Removing slsa-verifier binary and exiting." >&2 \
+	      && rm -f ${SLSA_VERIFIER_BIN_PATH}/slsa-verifier \
+	      && exit 1; \
+	    fi; \
+	    chmod +x $(SLSA_VERIFIER_BIN_PATH)/slsa-verifier \
+	    && command -v $(SLSA_VERIFIER_BIN_PATH)/slsa-verifier; \
+	fi;
 
 # Set up schemastore for GitHub Actions specs.
 setup-schemastore: $(PACKAGE_PATH)/resources/schemastore/github-workflow.json $(PACKAGE_PATH)/resources/schemastore/LICENSE $(PACKAGE_PATH)/resources/schemastore/NOTICE
@@ -238,8 +263,8 @@ setup-integration-test-utility-for-docker:
 # Generate a Software Bill of Materials (SBOM).
 .PHONY: sbom
 sbom: requirements
-	cyclonedx-py requirements --output-format json --output-file dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-sbom.json
-	$$HOME/go/bin/cyclonedx-gomod mod -json -output dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-sbom-go.json $(REPO_PATH)
+	cyclonedx-py requirements --output-format json --output-file dist/$(PACKAGE_WHEEL_DIST_NAME)-sbom.json
+	$$HOME/go/bin/cyclonedx-gomod mod -json -output dist/$(PACKAGE_WHEEL_DIST_NAME)-sbom-go.json $(REPO_PATH)
 
 # Generate a requirements.txt file containing version and integrity hashes for all
 # packages currently installed in the virtual environment. There's no easy way to
@@ -261,26 +286,25 @@ requirements.txt: pyproject.toml
 	  [[ $$pkg =~ (.*)==(.*) ]] && curl -s https://pypi.org/pypi/$${BASH_REMATCH[1]}/$${BASH_REMATCH[2]}/json | python -c "import json, sys; print(''.join(f''' \\\\\n    --hash=sha256:{pkg['digests']['sha256']}''' for pkg in json.load(sys.stdin)['urls']));" >> requirements.txt; \
 	done
 	echo -e -n "$(PACKAGE_NAME)==$(PACKAGE_VERSION)" >> requirements.txt
-	if [ -f dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz ]; then \
-	  echo -e -n " \\\\\n    $$(python -m pip hash --algorithm sha256 dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz | grep '^\-\-hash')" >> requirements.txt; \
+	if [ -f dist/$(PACKAGE_SDIST_NAME).tar.gz ]; then \
+	  echo -e -n " \\\\\n    $$(python -m pip hash --algorithm sha256 dist/$(PACKAGE_SDIST_NAME).tar.gz | grep '^\-\-hash')" >> requirements.txt; \
 	fi
-	if [ -f dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl ]; then \
-	  echo -e -n " \\\\\n    $$(python -m pip hash --algorithm sha256 dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl | grep '^\-\-hash')" >> requirements.txt; \
+	if [ -f dist/$(PACKAGE_WHEEL_DIST_NAME).whl ]; then \
+	  echo -e -n " \\\\\n    $$(python -m pip hash --algorithm sha256 dist/$(PACKAGE_WHEEL_DIST_NAME).whl | grep '^\-\-hash')" >> requirements.txt; \
 	fi
 	echo "" >> requirements.txt
-	cp requirements.txt dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-requirements.txt
+	cp requirements.txt dist/$(PACKAGE_WHEEL_DIST_NAME)-requirements.txt
 
 # Audit the currently installed packages. Skip packages that are installed in
 # editable mode (like the one in development here) because they may not have
 # a PyPI entry; also print out CVE description and potential fixes if audit
 # found an issue.
-# Ignoring GHSA-7gcm-g887-7qv7: remove the exception when a fix is available.
 .PHONY: audit
 audit:
 	if ! $$(python -c "import pip_audit" &> /dev/null); then \
 	  echo "No package pip_audit installed, upgrade your environment!" && exit 1; \
 	fi;
-	python -m pip_audit --skip-editable --desc on --fix --dry-run --ignore-vuln GHSA-7gcm-g887-7qv7
+	python -m pip_audit --skip-editable --desc on --fix --dry-run
 
 # Run some or all checks over the package code base.
 .PHONY: check check-code check-bandit check-flake8 check-lint check-mypy check-go check-actionlint
@@ -360,15 +384,16 @@ integration-test-update:
 # When building these artifacts, we need the environment variable SOURCE_DATE_EPOCH
 # set to the build date/epoch. For more details, see: https://flit.pypa.io/en/latest/reproducible.html
 .PHONY: dist
-dist: dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-build-epoch.txt
-dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl: check test integration-test
-	flit build --setup-py --format wheel
-dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION).tar.gz: check test integration-test
-	flit build --setup-py --format sdist
+dist: dist/$(PACKAGE_WHEEL_DIST_NAME).whl dist/$(PACKAGE_SDIST_NAME).tar.gz dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt
+dist/$(PACKAGE_WHEEL_DIST_NAME).whl: check test integration-test
+	SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format wheel
+	mv dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-py3-none-any.whl dist/$(PACKAGE_WHEEL_DIST_NAME).whl
+dist/$(PACKAGE_SDIST_NAME).tar.gz: check test integration-test
+	SOURCE_DATE_EPOCH=$(SOURCE_DATE_EPOCH) flit build --setup-py --format sdist
 dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip: docs
 	python -m zipfile -c dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-docs-html.zip docs/_build/html
-dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-build-epoch.txt:
-	echo $(SOURCE_DATE_EPOCH) > dist/$(PACKAGE_NAME)-$(PACKAGE_VERSION)-build-epoch.txt
+dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt:
+	echo $(SOURCE_DATE_EPOCH) > dist/$(PACKAGE_WHEEL_DIST_NAME)-build-epoch.txt
 
 # Build the HTML documentation from the package's source.
 .PHONY: docs
diff --git a/docker/Dockerfile.final b/docker/Dockerfile.final
index 2fc470904..b8c0a8a24 100644
--- a/docker/Dockerfile.final
+++ b/docker/Dockerfile.final
@@ -43,6 +43,15 @@ RUN : \
     && rm -rf $HOME/dist \
     && deactivate
 
+# Install slsa-verifier.
+# Copy only the Makefile from the build context
+COPY Makefile .
+RUN : \
+    && make install-slsa-verifier \
+    # Test that slsa-verifier exists and is executable.
+    && ls -l $HOME/.local/bin/slsa-verifier \
+    && test -x $HOME/.local/bin/slsa-verifier
+
 COPY --chown=macaron:macaron docker/user.sh $HOME/user.sh
 
 # We enable the root user here so that the user.sh script can modify the
diff --git a/docs/source/pages/installation.rst b/docs/source/pages/installation.rst
index 6e10c7005..6c3fb7cb2 100644
--- a/docs/source/pages/installation.rst
+++ b/docs/source/pages/installation.rst
@@ -7,6 +7,8 @@
 Installation Guide
 ==================
 
+.. contents:: :local:
+
 -------------
 Prerequisites
 -------------
@@ -23,9 +25,11 @@ Prerequisites
 Download
 --------
 
-Macaron is currently distributed as a Docker image. We provide a bash script ``run_macaron.sh`` to easily download and run it.
+Macaron is currently distributed as a Docker image and Python package. Note that the Python package is only published as a GitHub release asset. We provide bash scripts to easily download and run Macaron.
 
-.. note:: When run, Macaron will create output files inside the current directory where ``run_macaron.sh`` is run. If you run Docker Desktop, please make sure that the current directory is bind mountable for Docker (see the `File Sharing settings <https://docs.docker.com/desktop/settings/mac/?uuid=95C3E343-F11C-4A35-A10C-6B5431B73E14#file-sharing>`_).
+'''''''''''''''''''''''''''''''''
+Install Macaron as a Docker image
+'''''''''''''''''''''''''''''''''
 
 Download the ``run_macaron.sh`` script and make it executable by running the commands (replace ``tag`` with the version you want or ``release`` for the latest version):
 
@@ -34,9 +38,11 @@ Download the ``run_macaron.sh`` script and make it executable by running the com
   curl -O https://raw.githubusercontent.com/oracle/macaron/refs/tags/<tag>/scripts/release_scripts/run_macaron.sh
   chmod +x run_macaron.sh
 
-----------------------------------------
-Verify that the installation is complete
-----------------------------------------
+.. note:: When run, Macaron will create output files inside the current directory where ``run_macaron.sh`` is run. If you run Docker Desktop, please make sure that the current directory is bind mountable for Docker (see the `File Sharing settings <https://docs.docker.com/desktop/settings/mac/?uuid=95C3E343-F11C-4A35-A10C-6B5431B73E14#file-sharing>`_).
+
+''''''''''''''''''''''
+Check the Docker Image
+''''''''''''''''''''''
 
 To verify your setup, go to the directory containing the downloaded ``run_macaron.sh`` script and run this command in order to print out the help message for Macaron:
 
@@ -51,6 +57,43 @@ To verify your setup, go to the directory containing the downloaded ``run_macaro
 
 .. note:: By default, the script will always check the docker registry to ensure the docker image is up-to-date. This can be overridden if necessary (e.g. if running offline with a pre-installed image) by assigning the environment variable ``DOCKER_PULL``. For example: ``DOCKER_PULL=never ./run_macaron.sh --help``
 
+'''''''''''''''''''''''''''''''''''
+Install Macaron as a Python package
+'''''''''''''''''''''''''''''''''''
+
+Download the ``install_macaron_python.sh`` script and make it executable by running the commands (replace ``tag`` with the version you want or ``release`` for the latest version):
+
+.. code-block:: shell
+
+  curl -O https://raw.githubusercontent.com/oracle/macaron/refs/tags/<tag>/scripts/release_scripts/install_macaron_python.sh
+  chmod +x install_macaron_python.sh
+
+Install the package by providing a version. The installation will automatically create a virtual environment at ``./.venv`` if one does not already exist. The script uses your system's ``python3`` interpreter, and requires Python ``3.11.14`` or later to be available:
+
+.. code-block:: shell
+
+  ./install_macaron_python.sh 0.20.0
+
+Macaron might call `slsa-verifier <https://github.com/slsa-framework/slsa-verifier>`_ for the ``mcn_provenance_verified_1`` check if it is already installed on your machine. You can also pass the ``--install-slsa-verifier`` option to the script to install it for you. Note that if slsa-verifier is not installed, we only log an error but proceed with the rest of the analysis. For further information run:
+
+.. code-block:: shell
+
+  ./install_macaron_python.sh --help
+
+If you run Macaron as a Python package and would like to run the :ref:`verify-policy <verify-policy-command-cli>` or :ref:`gen-build-spec <gen-build-spec-command-cli>` commands, you need to install the Datalog engine `souffle <https://souffle-lang.github.io/install>`_ separately.
+
+''''''''''''''''''''''''
+Check the Python package
+''''''''''''''''''''''''
+
+To verify your setup, activate the virtual environment and run this command in order to print out the help message for Macaron:
+
+.. code-block:: shell
+
+  source .venv/bin/activate
+  macaron --help
+
+
 .. _prepare-github-token:
 
 ---------------------------
diff --git a/scripts/release_scripts/install_macaron_python.sh b/scripts/release_scripts/install_macaron_python.sh
new file mode 100755
index 000000000..b6e97fc4c
--- /dev/null
+++ b/scripts/release_scripts/install_macaron_python.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+set -euo pipefail
+
+print_help() {
+cat << EOF
+Usage: $0 <MACARON_VERSION> [--install-slsa-verifier] [-h|--help]
+
+Arguments:
+  <MACARON_VERSION>           Version of Macaron to install.
+  --install-slsa-verifier     (Optional) Install the SLSA Verifier binary.
+  -h, --help                  Show this help and exit.
+
+Examples:
+  $0 0.21.0
+  $0 0.21.0 --install-slsa-verifier
+EOF
+}
+
+# SLSA Verifier Installer
+# Get the checksum from https://github.com/slsa-framework/slsa-verifier/blob/main/SHA256SUM.md.
+install_slsa_verifier() {
+    SLSA_VERIFIER_TAG="v2.7.1"
+    SLSA_VERIFIER_BIN="slsa-verifier-linux-amd64"
+    SLSA_VERIFIER_BIN_PATH="${HOME}/.local/bin"
+    SLSA_VERIFIER_CHECKSUM="946dbec729094195e88ef78e1734324a27869f03e2c6bd2f61cbc06bd5350339"
+
+    if ! command -v slsa-verifier >/dev/null 2>&1; then
+        echo "[Info] Installing slsa-verifier..."
+        mkdir -p "$SLSA_VERIFIER_BIN_PATH"
+        curl --fail -L -o "${SLSA_VERIFIER_BIN_PATH}/slsa-verifier" "https://github.com/slsa-framework/slsa-verifier/releases/download/${SLSA_VERIFIER_TAG}/${SLSA_VERIFIER_BIN}"
+        SLSA_VERIFIER_COMPUTED_HASH=$(sha256sum "${SLSA_VERIFIER_BIN_PATH}/slsa-verifier" | cut -d' ' -f1)
+        if [ "$SLSA_VERIFIER_COMPUTED_HASH" != "$SLSA_VERIFIER_CHECKSUM" ]; then
+            echo "[Error] SLSA verification did not pass. Removing slsa-verifier binary and exiting." >&2
+            rm -f "${SLSA_VERIFIER_BIN_PATH}/slsa-verifier"
+            exit 1
+        fi
+        chmod +x "${SLSA_VERIFIER_BIN_PATH}/slsa-verifier"
+        echo "[Info] slsa-verifier installed at: ${SLSA_VERIFIER_BIN_PATH}/slsa-verifier"
+    else
+        echo "[Info] slsa-verifier already installed."
+    fi
+}
+
+# Handle arguments.
+INSTALL_SLSA=0
+MACARON_VERSION=""
+
+for arg in "$@"; do
+  case "$arg" in
+    -h|--help)
+      print_help
+      exit 0
+      ;;
+    --install-slsa-verifier)
+      INSTALL_SLSA=1
+      ;;
+    *)
+      if [[ -z "$MACARON_VERSION" ]]; then
+        MACARON_VERSION="$arg"
+      fi
+      ;;
+  esac
+done
+
+if [[ -z "$MACARON_VERSION" ]]; then
+  echo "Error: Please provide the Macaron version as an argument."
+  print_help
+  exit 1
+fi
+
+if [[ "$INSTALL_SLSA" -eq 1 ]]; then
+  install_slsa_verifier
+fi
+
+# Macaron Installer
+
+# Configuration.
+PYTHON_VERSION="3"
+MACARON_DISTRO="py3-none-linux_x86_64"
+MACARON_WHEEL="macaron-${MACARON_VERSION}-${MACARON_DISTRO}.whl"
+MACARON_REQUIREMENTS="macaron-${MACARON_VERSION}-${MACARON_DISTRO}-requirements.txt"
+MACARON_REPO="https://github.com/oracle/macaron"
+VENV_DIR=".venv"
+
+echo "Using Macaron version: $MACARON_VERSION"
+
+# Download Macaron release assets if not already downloaded.
+echo "Checking for release files..."
+if [[ ! -f "$MACARON_WHEEL" ]]; then
+  echo "Downloading wheel: $MACARON_WHEEL"
+  wget "${MACARON_REPO}/releases/download/v${MACARON_VERSION}/${MACARON_WHEEL}"
+else
+  echo "Using existing wheel: $MACARON_WHEEL"
+fi
+
+if [[ ! -f "$MACARON_REQUIREMENTS" ]]; then
+  echo "Downloading requirements: $MACARON_REQUIREMENTS"
+  wget "${MACARON_REPO}/releases/download/v${MACARON_VERSION}/${MACARON_REQUIREMENTS}"
+else
+  echo "Using existing requirements: $MACARON_REQUIREMENTS"
+fi
+
+# Set up Python virtual environment.
+if [[ ! -d "$VENV_DIR" ]]; then
+  echo "Creating virtual environment with Python ${PYTHON_VERSION}..."
+  python${PYTHON_VERSION} -m venv "${VENV_DIR}"
+fi
+
+# shellcheck disable=SC1091
+source "${VENV_DIR}/bin/activate"
+export PATH="${VENV_DIR}/bin:$PATH"
+
+# Install Macaron package and dependencies.
+echo "Installing Macaron..."
+pip install --no-deps "${MACARON_WHEEL}"
+pip install --no-deps -r "${MACARON_REQUIREMENTS}"
+
+# Check version.
+echo "Macaron successfully installed:"
+macaron --version
diff --git a/src/macaron/provenance/provenance_verifier.py b/src/macaron/provenance/provenance_verifier.py
index 06356eff6..72b457ca0 100644
--- a/src/macaron/provenance/provenance_verifier.py
+++ b/src/macaron/provenance/provenance_verifier.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains methods for verifying provenance files."""
@@ -6,6 +6,7 @@
 import hashlib
 import logging
 import os
+import shutil
 import subprocess  # nosec B404
 import tarfile
 import zipfile
@@ -202,7 +203,6 @@ def verify_ci_provenance(analyze_ctx: AnalyzeContext, ci_info: CIInfo, download_
                     return False
 
             sub_verified = _verify_slsa(
-                analyze_ctx.macaron_path,
                 download_path,
                 provenance.asset,
                 sub_asset["name"],
@@ -303,19 +303,42 @@ def _validate_path_traversal(path: str) -> bool:
     return False
 
 
-def _verify_slsa(
-    macaron_path: str, download_path: str, prov_asset: AssetLocator, asset_name: str, repository_url: str
-) -> bool:
+def _is_slsa_verifier_installed() -> bool:
+    """Check if slsa-verifer is present on the execution path.
+
+    Returns
+    -------
+    bool
+        True if slsa-verifer is present on the execution path.
+    """
+    if shutil.which("slsa-verifier") is None:
+        logger.debug("slsa-verifier is not on the execution path.")
+        return False
+    return True
+
+
+def _verify_slsa(download_path: str, prov_asset: AssetLocator, asset_name: str, repository_url: str) -> bool:
     """Run SLSA verifier to verify the artifact."""
     source_path = get_repo_dir_name(repository_url, sanitize=False)
     if not source_path:
         logger.error("Invalid repository source path to verify: %s.", repository_url)
         return False
 
+    if not _is_slsa_verifier_installed():
+        os.environ["PATH"] = os.path.join(Path.home(), ".local", "bin") + os.pathsep + os.environ.get("PATH", "")
+        logger.debug("PATH: %s", os.environ["PATH"])
+        # Try the ~/.local/bin path.
+        if not _is_slsa_verifier_installed():
+            logger.error(
+                "slsa-verifier is not installed or is not present on the execution path."
+                " See https://github.com/slsa-framework/slsa-verifier for instructions."
+            )
+            return False
+
     errors: list[str] = []
     verified = False
     cmd = [
-        os.path.join(macaron_path, "bin/slsa-verifier"),
+        "slsa-verifier",
         "verify-artifact",
         os.path.join(download_path, asset_name),
         "--provenance-path",
diff --git a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
index e0f43e2ce..e307a28b8 100644
--- a/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
+++ b/tests/integration/cases/micronaut-projects_micronaut-test/micronaut-test.dl
@@ -1,4 +1,4 @@
-/* Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 #include "prelude.dl"
@@ -9,6 +9,7 @@ Policy("test_policy", component_id, "") :-
     check_passed(component_id, "mcn_build_service_1"),
     check_passed(component_id, "mcn_version_control_system_1"),
     check_passed(component_id, "mcn_provenance_available_1"),
+    check_passed(component_id, "mcn_provenance_verified_1"),
     check_passed(component_id, "mcn_provenance_derived_repo_1"),
     check_passed(component_id, "mcn_build_tool_1"),
     build_tool_check(gradle_id, "gradle", "java"),
diff --git a/tests/integration/cases/micronaut-projects_micronaut-test/test.yaml b/tests/integration/cases/micronaut-projects_micronaut-test/test.yaml
index c7cda9fc2..4bf43d20e 100644
--- a/tests/integration/cases/micronaut-projects_micronaut-test/test.yaml
+++ b/tests/integration/cases/micronaut-projects_micronaut-test/test.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: |
@@ -9,6 +9,10 @@ tags:
 - macaron-python-package
 
 steps:
+- name: Install slsa-verifier
+  kind: shell
+  options:
+    cmd: make --file ../../../../Makefile install-slsa-verifier
 - name: Run macaron analyze micronaut-projects/micronaut-test
   kind: analyze
   options:
diff --git a/tests/integration/cases/ossf_scorecard/test.yaml b/tests/integration/cases/ossf_scorecard/test.yaml
index 653140505..c16124433 100644
--- a/tests/integration/cases/ossf_scorecard/test.yaml
+++ b/tests/integration/cases/ossf_scorecard/test.yaml
@@ -1,11 +1,10 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: >
   Test CUE provenance expectation for ossf/scorecard, policy verification, and VSA generation.
 
 tags:
-- macaron-python-package
 - macaron-docker-image
 
 steps:
diff --git a/tests/integration/cases/urllib3_expectation_dir/test.yaml b/tests/integration/cases/urllib3_expectation_dir/test.yaml
index 8646c8edd..88d589ef4 100644
--- a/tests/integration/cases/urllib3_expectation_dir/test.yaml
+++ b/tests/integration/cases/urllib3_expectation_dir/test.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: |
@@ -10,6 +10,10 @@ tags:
 - macaron-docker-image
 
 steps:
+- name: Install slsa-verifier
+  kind: shell
+  options:
+    cmd: make --file ../../../../Makefile install-slsa-verifier
 - name: Run macaron analyze with expectation directory
   kind: analyze
   options:
diff --git a/tests/integration/cases/urllib3_expectation_file/test.yaml b/tests/integration/cases/urllib3_expectation_file/test.yaml
index 5b204387b..fc6593160 100644
--- a/tests/integration/cases/urllib3_expectation_file/test.yaml
+++ b/tests/integration/cases/urllib3_expectation_file/test.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: |
@@ -6,7 +6,6 @@ description: |
   The CUE expectation file is provided as a single file path.
 
 tags:
-- macaron-python-package
 - macaron-docker-image
 - tutorial
 
diff --git a/tests/integration/cases/urllib3_invalid_expectation/test.yaml b/tests/integration/cases/urllib3_invalid_expectation/test.yaml
index 960e10ebe..697a9a83e 100644
--- a/tests/integration/cases/urllib3_invalid_expectation/test.yaml
+++ b/tests/integration/cases/urllib3_invalid_expectation/test.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 description: |
@@ -6,7 +6,6 @@ description: |
   The CUE expectation file is invalid.
 
 tags:
-- macaron-python-package
 - macaron-docker-image
 
 steps:

From 66a64bbfcf774cd332a383a0e3786f704ced4191 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Tue, 10 Feb 2026 08:49:08 +1000
Subject: [PATCH 13/20] chore(deps): update Go dependencies (#1295)

This PR updates the Go version to 1.24 and also updates the Go dependencies.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .pre-commit-config.yaml                       |  6 +--
 CONTRIBUTING.md                               |  2 +-
 go.mod                                        | 15 +++----
 go.sum                                        | 42 ++++++++++---------
 golang/README.md                              |  2 +-
 golang/internal/bashparser/bashparser_test.go |  5 +--
 golang/internal/filewriter/filewriter_test.go |  5 +--
 7 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d9321295a..6166236b5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 # See https://pre-commit.com for more information
@@ -224,7 +224,7 @@ repos:
 
 # A linter for Golang
 - repo: https://github.com/golangci/golangci-lint
-  rev: v2.3.0
+  rev: v2.8.0
   hooks:
   - id: golangci-lint
 
@@ -236,7 +236,7 @@ repos:
 # Other staged files shouldn't trigger these hooks.
 # Documentation: https://github.com/TekWizely/pre-commit-golang/blob/v1.0.0-rc.1/README.md.
 - repo: https://github.com/tekwizely/pre-commit-golang
-  rev: v1.0.0-rc.1
+  rev: v1.0.0-rc.4
   hooks:
   - id: go-build-mod
   - id: go-build-repo-mod
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 78e293085..5b27488d0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -85,7 +85,7 @@ Please see the [README for the malware analyzer](./src/macaron/malware_analyzer/
 ### Prerequisites
 
 - Python 3.11.14
-- Go 1.23
+- Go 1.24
 - JDK 17
 
 ### Prepare the environment
diff --git a/go.mod b/go.mod
index d724fc781..36f40d378 100644
--- a/go.mod
+++ b/go.mod
@@ -3,12 +3,12 @@
 
 module github.com/oracle/macaron
 
-go 1.23.0
+go 1.24.0
 
-toolchain go1.23.2
+toolchain go1.24.13
 
 require (
-	cuelang.org/go v0.14.1
+	cuelang.org/go v0.15.4
 	mvdan.cc/sh/v3 v3.12.0
 )
 
@@ -18,8 +18,9 @@ require (
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/mitchellh/go-wordwrap v1.0.1 // indirect
 	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
-	github.com/protocolbuffers/txtpbfmt v0.0.0-20250627152318-f293424e46b5 // indirect
-	golang.org/x/net v0.42.0 // indirect
-	golang.org/x/text v0.27.0 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
+	github.com/protocolbuffers/txtpbfmt v0.0.0-20251016062345-16587c79cd91 // indirect
+	go.yaml.in/yaml/v3 v3.0.4 // indirect
+	golang.org/x/net v0.46.0 // indirect
+	golang.org/x/text v0.30.0 // indirect
+	google.golang.org/protobuf v1.33.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 53aa010f2..167c82781 100644
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,7 @@
-cuelabs.dev/go/oci/ociregistry v0.0.0-20250715075730-49cab49c8e9d h1:lX0EawyoAu4kgMJJfy7MmNkIHioBcdBGFRSKDZ+CWo0=
-cuelabs.dev/go/oci/ociregistry v0.0.0-20250715075730-49cab49c8e9d/go.mod h1:4WWeZNxUO1vRoZWAHIG0KZOd6dA25ypyWuwD3ti0Tdc=
-cuelang.org/go v0.14.1 h1:kxFAHr7bvrCikbtVps2chPIARazVdnRmlz65dAzKyWg=
-cuelang.org/go v0.14.1/go.mod h1:aSP9UZUM5m2izHAHUvqtq0wTlWn5oLjuv2iBMQZBLLs=
+cuelabs.dev/go/oci/ociregistry v0.0.0-20250722084951-074d06050084 h1:4k1yAtPvZJZQTu8DRY8muBo0LHv6TqtrE0AO5n6IPYs=
+cuelabs.dev/go/oci/ociregistry v0.0.0-20250722084951-074d06050084/go.mod h1:4WWeZNxUO1vRoZWAHIG0KZOd6dA25ypyWuwD3ti0Tdc=
+cuelang.org/go v0.15.4 h1:lrkTDhqy8dveHgX1ZLQ6WmgbhD8+rXa0fD25hxEKYhw=
+cuelang.org/go v0.15.4/go.mod h1:NYw6n4akZcTjA7QQwJ1/gqWrrhsN4aZwhcAL0jv9rZE=
 github.com/cockroachdb/apd/v3 v3.2.1 h1:U+8j7t0axsIgvQUqthuNm82HIrYXodOV2iWLWtEaIwg=
 github.com/cockroachdb/apd/v3 v3.2.1/go.mod h1:klXJcjp+FffLTHlhIG69tezTDvdP065naDsHzKhYSqc=
 github.com/emicklei/proto v1.14.2 h1:wJPxPy2Xifja9cEMrcA/g08art5+7CGJNFNk35iXC1I=
@@ -28,26 +28,28 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw
 github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
 github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
 github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
-github.com/protocolbuffers/txtpbfmt v0.0.0-20250627152318-f293424e46b5 h1:WWs1ZFnGobK5ZXNu+N9If+8PDNVB9xAqrib/stUXsV4=
-github.com/protocolbuffers/txtpbfmt v0.0.0-20250627152318-f293424e46b5/go.mod h1:BnHogPTyzYAReeQLZrOxyxzS739DaTNtTvohVdbENmA=
+github.com/protocolbuffers/txtpbfmt v0.0.0-20251016062345-16587c79cd91 h1:s1LvMaU6mVwoFtbxv/rCZKE7/fwDmDY684FfUe4c1Io=
+github.com/protocolbuffers/txtpbfmt v0.0.0-20251016062345-16587c79cd91/go.mod h1:JSbkp0BviKovYYt9XunS95M3mLPibE9bGg+Y95DsEEY=
 github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
 github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
-golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
-golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
-golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
-golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
-golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
-golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
-golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
-golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
-golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
-golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
-golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
+go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
+go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
+golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
+golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
+golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 mvdan.cc/sh/v3 v3.12.0 h1:ejKUR7ONP5bb+UGHGEG/k9V5+pRVIyD+LsZz7o8KHrI=
 mvdan.cc/sh/v3 v3.12.0/go.mod h1:Se6Cj17eYSn+sNooLZiEUnNNmNxg0imoYlTu4CyaGyg=
diff --git a/golang/README.md b/golang/README.md
index 4cefbe323..6ab4b1695 100644
--- a/golang/README.md
+++ b/golang/README.md
@@ -1,7 +1,7 @@
 # Go module documentation
 ## Quick start
 Prerequisites
-- Go (tested on `go 1.23.0 linux/amd64`). Installation instructions [here](https://go.dev/doc/install).
+- Go (tested on `go 1.24 linux/amd64`). Installation instructions [here](https://go.dev/doc/install).
 
 - Prepare the required libraries by running this command from the root dir of this repository:
 ```bash
diff --git a/golang/internal/bashparser/bashparser_test.go b/golang/internal/bashparser/bashparser_test.go
index 4cf0a6813..3825f459b 100644
--- a/golang/internal/bashparser/bashparser_test.go
+++ b/golang/internal/bashparser/bashparser_test.go
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 package bashparser
@@ -29,7 +29,6 @@ func Test_parse_valid_bash_script(t *testing.T) {
 	var result map[string]interface{}
 	err := json.Unmarshal([]byte(json_content), &result)
 	if err != nil {
-		t.Errorf(string(err.Error()))
-		t.Errorf("Cannot unmarshal the returned JSON content from parsing %s.", json_content)
+		t.Errorf("Cannot unmarshal the returned JSON content from parsing %s: %v.", json_content, err)
 	}
 }
diff --git a/golang/internal/filewriter/filewriter_test.go b/golang/internal/filewriter/filewriter_test.go
index f8faa73cd..7f8e273a3 100644
--- a/golang/internal/filewriter/filewriter_test.go
+++ b/golang/internal/filewriter/filewriter_test.go
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. */
+/* Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. */
 /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */
 
 package filewriter
@@ -26,8 +26,7 @@ func Test_store_to_file(t *testing.T) {
 
 	read_content, err := os.ReadFile(out_path)
 	if err != nil {
-		t.Errorf("Error when trying to store to %s.", out_path)
-		t.Errorf(err.Error())
+		t.Errorf("Error when trying to store to %s: %v.", out_path, err)
 	} else {
 		if string(read_content) != store_content {
 			t.Errorf("The store content is not correct")

From 20c11695020663da06ee4ae3585633e784ec046d Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Thu, 12 Feb 2026 10:13:11 +1000
Subject: [PATCH 14/20] chore(deps): update Python dependencies (#1299)

Updates the Python dependencies. In particular, this PR updates cryptography to address CVE-2026-26007.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 pyproject.toml                                | 21 +++++++++----------
 src/macaron/__main__.py                       |  4 ++--
 .../repo_finder/repo_finder_deps_dev.py       |  4 ++--
 src/macaron/slsa_analyzer/git_url.py          |  4 ++--
 tests/integration/run.py                      |  4 ++--
 5 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0c0f16641..a30882001 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ dependencies = [
     "requests >=2.32.3,<3.0.0",
     "pydriller >=2.0,<3.0.0",
     "yamale >=6.0.0,<7.0.0",
-    "packaging >=24.0,<25.0.0",
+    "packaging >=25.0,<27.0.0",
     "jinja2 >=3.1.2,<4.0.0",
     "SQLAlchemy >=2.0.0,<3.0.0",
     "defusedxml >=0.7.1,<1.0.0",
@@ -33,11 +33,11 @@ dependencies = [
     "ruamel.yaml >= 0.18.6,<1.0.0",
     "jsonschema >= 4.22.0,<5.0.0",
     "cyclonedx-bom >=7.0.0,<8.0.0",
-    "cyclonedx-python-lib[validation] >=8.0.0,<11.0.0",
+    "cyclonedx-python-lib[validation] >=9.0.0,<12.0.0",
     "beautifulsoup4 >= 4.12.0,<5.0.0",
     "problog >= 2.2.6,<3.0.0",
-    "cryptography >=44.0.0,<45.0.0",
-    "semgrep == 1.149.0",
+    "cryptography >=46.0.5,<47.0.0",
+    "semgrep == 1.151.0",
     "email-validator >=2.2.0,<3.0.0",
     "rich >=13.5.3,<15.0.0",
     "lark >= 1.3.0,<2.0.0",
@@ -71,16 +71,16 @@ macaron = 'macaron.__main__:main'
 # installed. Make sure to keep the requirements in sync with the workflows!
 actions = [
     "commitizen >=4.0.0,<5.0.0",
-    "twine >=5.0.0,<6.0.0",
+    "twine >=6.0.0,<7.0.0",
 ]
 dev = [
     "flit >=3.2.0,<4.0.0",
-    "mypy >=1.0.0,<1.16",
+    "mypy >=1.19.1,<1.20",
     "types-pyyaml >=6.0.4,<7.0.0",
     "types-requests >=2.25.6,<3.0.0",
     "types-jsonschema >=4.22.0,<5.0.0",
     "pip-audit >=2.5.6,<3.0.0",
-    "pylint >=3.0.3,<4.0.0",
+    "pylint >=4.0.4,<5.0.0",
     "cyclonedx-bom >=7.0.0,<8.0.0",
     "types-beautifulsoup4 >= 4.12.0,<5.0.0",
 ]
@@ -98,12 +98,12 @@ hooks = [
 # Note that the `custom_exit_code` and `env` plugins may currently be unmaintained.
 test = [
     "hypothesis >=6.100.1,<7.0.0",
-    "pytest >=8.2.2,<9.0.0",
+    "pytest >=9.0.2,<10.0.0",
     "pytest-custom_exit_code >=0.3.0,<1.0.0",
-    "pytest-cov >=6.0.0,<7.0.0",
+    "pytest-cov >=7.0.0,<8.0.0",
     "pytest-env >=1.0.0,<2.0.0",
     "pytest_httpserver >=1.0.10,<2.0.0",
-    "syrupy >=4.0.0,<5.0.0",
+    "syrupy >=5.1.0,<6.0.0",
 ]
 
 test-docker = [
@@ -217,7 +217,6 @@ ignore_missing_imports = true
 # https://pylint.pycqa.org/en/latest/user_guide/configuration/index.html
 [tool.pylint.MASTER]
 fail-under = 10.0
-suggestion-mode = true  # Remove this setting when pylint v4 is released.
 load-plugins = [
     "pylint.extensions.check_elif",
     "pylint.extensions.for_any_all",
diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py
index dd103eec6..addb0f881 100644
--- a/src/macaron/__main__.py
+++ b/src/macaron/__main__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This is the main entrypoint to run Macaron."""
@@ -92,7 +92,7 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None
 
         local_maven_repo = os.path.join(home_dir, ".m2")
         if not os.path.isdir(local_maven_repo):
-            logger.debug("The default local Maven repo at %s does not exist. Ignore ...")
+            logger.debug("The default local Maven repo at %s does not exist. Ignore ...", local_maven_repo)
             global_config.local_maven_repo = None
 
         global_config.local_maven_repo = local_maven_repo
diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py
index 07b5e4f34..e3f92cc4c 100644
--- a/src/macaron/repo_finder/repo_finder_deps_dev.py
+++ b/src/macaron/repo_finder/repo_finder_deps_dev.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2023 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the PythonRepoFinderDD class to be used for finding repositories using deps.dev."""
@@ -179,7 +179,7 @@ def get_attestation(purl: PackageURL) -> tuple[dict | None, str | None, bool]:
             and a flag for whether the attestation is verified.
         """
         if purl.type != "pypi":
-            logger.debug("PURL type (%s) attestation not yet supported via deps.dev.")
+            logger.debug("PURL type (%s) attestation not yet supported via deps.dev.", purl.type)
             return None, None, False
 
         if not purl.version:
diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py
index 62a40833f..6fa019991 100644
--- a/src/macaron/slsa_analyzer/git_url.py
+++ b/src/macaron/slsa_analyzer/git_url.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module provides methods to perform generic actions on Git URLS."""
@@ -338,7 +338,7 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None:
                 )
                 return Repo(path=clone_dir)
             except (subprocess.CalledProcessError, OSError):
-                logger.debug("The clone dir %s is not empty. An attempt to update it failed.")
+                logger.debug("The clone dir %s is not empty. An attempt to update it failed.", clone_dir)
                 return None
 
     # Ensure that the parent directory where the repo is cloned into exists.
diff --git a/tests/integration/run.py b/tests/integration/run.py
index e78cf57a6..45d7ed93a 100644
--- a/tests/integration/run.py
+++ b/tests/integration/run.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Integration test utility."""
@@ -1220,7 +1220,7 @@ def main(argv: Sequence[str] | None = None) -> int:
 
     path = shutil.which(args.macaron)
     if path is None:
-        logger.error("'%s' is not a command.")
+        logger.error("'%s' is not a command.", args.macaron)
         return 1
     macaron_cmd = os.path.abspath(path)
 

From 750038bacb4ed4bad3114043db0abf4f82c25e1c Mon Sep 17 00:00:00 2001
From: Nicholas Allen <nicholas.allen@oracle.com>
Date: Tue, 17 Feb 2026 14:11:19 +1000
Subject: [PATCH 15/20] fix: handle GitHub Actions job needs field
 case-insensitively in analysis. (#1305)

Signed-off-by: Nicholas Allen <nicholas.allen@oracle.com>
---
 .../code_analyzer/dataflow_analysis/github.py | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py
index 6da30e745..222f55fb1 100644
--- a/src/macaron/code_analyzer/dataflow_analysis/github.py
+++ b/src/macaron/code_analyzer/dataflow_analysis/github.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Dataflow analysis implementation for analysing GitHub Actions Workflow build pipelines."""
@@ -316,6 +316,15 @@ def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]
 
         return result
 
+    @staticmethod
+    def _find_job_id_case_insensitive(jobs: dict[str, RawGitHubActionsJobNode], job_id: str) -> str | None:
+        if job_id in jobs:
+            return job_id
+        for actual_job_id in jobs:
+            if actual_job_id.lower() == job_id.lower():
+                return actual_job_id
+        return None
+
     @staticmethod
     def create(
         workflow: github_workflow_model.Workflow, context: core.NonOwningContextRef[GitHubActionsWorkflowContext]
@@ -352,10 +361,16 @@ def create(
                 needs = job_node.definition["needs"]
                 if isinstance(needs, list):
                     for need in needs:
-                        # TODO invalid needs id?
-                        edges.append(need)
+                        actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, need)
+                        if actual_need is None:
+                            raise CallGraphError("needs refers to invalid job")
+                        edges.append(actual_need)
                 elif isinstance(needs, str):
-                    edges.append(needs)
+                    actual_need = GitHubActionsWorkflowNode._find_job_id_case_insensitive(jobs, needs)
+                    if actual_need is None:
+                        raise CallGraphError("needs refers to invalid job")
+                    edges.append(actual_need)
+
             dependency_graph[job_id] = edges
 
         ts = TopologicalSorter(dependency_graph)

From bf788f3b548d0ed3821006368a311ff5a740e4e3 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Wed, 18 Feb 2026 09:50:52 +1000
Subject: [PATCH 16/20] fix(gen-build-spec): handle errors gracefully when
 build tool is not supported (#1303)

This PR improves error handling in the build spec generation process for unsupported build tools.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .../build_command_patcher.py                  | 17 ++--
 .../common_spec/maven_spec.py                 | 16 +--
 .../common_spec/pypi_spec.py                  | 13 +--
 .../common_spec/test_core.py                  | 98 ++++++++++++++++++-
 .../test_build_command_patcher.py             | 25 ++++-
 5 files changed, 140 insertions(+), 29 deletions(-)

diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py
index 4fe26f2ba..224ec5715 100644
--- a/src/macaron/build_spec_generator/build_command_patcher.py
+++ b/src/macaron/build_spec_generator/build_command_patcher.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the implementation of the build command patching."""
@@ -83,23 +83,26 @@ def _patch_commands(
     which just holds the original command as a list of string, without any changes.
     """
     result: list[CLICommand] = []
-    for cmds in cmds_sequence:
+    for cmd in cmds_sequence:
+        # Checking if the command is a valid non-empty list.
+        if not cmd:
+            continue
         effective_cli_parser = None
         for cli_parser in cli_parsers:
-            if cli_parser.is_build_tool(cmds[0]):
+            if cli_parser.is_build_tool(cmd[0]):
                 effective_cli_parser = cli_parser
                 break
 
         if not effective_cli_parser:
-            result.append(UnparsedCLICommand(original_cmds=cmds))
+            result.append(UnparsedCLICommand(original_cmds=cmd))
             continue
 
         try:
-            cli_command = effective_cli_parser.parse(cmds)
+            cli_command = effective_cli_parser.parse(cmd)
         except CommandLineParseError as error:
             logger.error(
                 "Failed to patch the cli command %s. Error %s.",
-                " ".join(cmds),
+                " ".join(cmd),
                 error,
             )
             return None
@@ -117,7 +120,7 @@ def _patch_commands(
         except PatchBuildCommandError as error:
             logger.error(
                 "Failed to patch the build command %s. Error %s.",
-                " ".join(cmds),
+                " ".join(cmd),
                 error,
             )
             return None
diff --git a/src/macaron/build_spec_generator/common_spec/maven_spec.py b/src/macaron/build_spec_generator/common_spec/maven_spec.py
index 1d0abf4f8..de0b4c5df 100644
--- a/src/macaron/build_spec_generator/common_spec/maven_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/maven_spec.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module includes build specification and helper classes for Maven packages."""
@@ -12,7 +12,6 @@
 from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict
 from macaron.build_spec_generator.common_spec.jdk_finder import find_jdk_version_from_central_maven_repo
 from macaron.build_spec_generator.common_spec.jdk_version_normalizer import normalize_jdk_version
-from macaron.errors import GenerateBuildSpecError
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -46,11 +45,6 @@ def get_default_build_commands(
         -------
         list[list[str]]
             The build command as a list[list[str]].
-
-        Raises
-        ------
-        GenerateBuildSpecError
-            If there is no default build command available for the specified build tool.
         """
         default_build_commands = []
 
@@ -65,11 +59,10 @@ def get_default_build_commands(
                     pass
 
         if not default_build_commands:
-            logger.critical(
+            logger.debug(
                 "There is no default build command available for the build tools %s.",
                 build_tool_names,
             )
-            raise GenerateBuildSpecError("Unable to find a default build command.")
 
         return default_build_commands
 
@@ -118,12 +111,13 @@ def resolve_fields(self, purl: PackageURL) -> None:
         selected_build_commands = self.data["build_commands"] or self.get_default_build_commands(
             self.data["build_tools"]
         )
-
         patched_build_commands = patch_commands(
             cmds_sequence=selected_build_commands,
             patches=CLI_COMMAND_PATCHES,
         )
         if not patched_build_commands:
-            raise GenerateBuildSpecError(f"Failed to patch command sequences {selected_build_commands}.")
+            logger.debug("Failed to patch build command sequences %s", selected_build_commands)
+            self.data["build_commands"] = []
+            return
 
         self.data["build_commands"] = patched_build_commands
diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index 0471afd72..097648214 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -16,7 +16,7 @@
 
 from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict
 from macaron.config.defaults import defaults
-from macaron.errors import GenerateBuildSpecError, SourceCodeError, WheelTagError
+from macaron.errors import SourceCodeError, WheelTagError
 from macaron.json_tools import json_extract
 from macaron.slsa_analyzer.package_registry import pypi_registry
 from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
@@ -55,11 +55,6 @@ def get_default_build_commands(
         -------
         list[list[str]]
             The build command as a list[list[str]].
-
-        Raises
-        ------
-        GenerateBuildSpecError
-            If there is no default build command available for the specified build tool.
         """
         default_build_commands = []
 
@@ -77,16 +72,16 @@ def get_default_build_commands(
                 case "hatch":
                     default_build_commands.append("hatch build".split())
                 case "conda":
-                    default_build_commands.append('echo("Not supported")'.split())
+                    # TODO: update this if a build command can be used for conda.
+                    pass
                 case _:
                     pass
 
         if not default_build_commands:
-            logger.critical(
+            logger.debug(
                 "There is no default build command available for the build tools %s.",
                 build_tool_names,
             )
-            raise GenerateBuildSpecError("Unable to find a default build command.")
 
         return default_build_commands
 
diff --git a/tests/build_spec_generator/common_spec/test_core.py b/tests/build_spec_generator/common_spec/test_core.py
index 7df8b1615..a0620c869 100644
--- a/tests/build_spec_generator/common_spec/test_core.py
+++ b/tests/build_spec_generator/common_spec/test_core.py
@@ -1,11 +1,15 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the tests for build spec generation"""
 
 import pytest
+from packageurl import PackageURL
 
+from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict
 from macaron.build_spec_generator.common_spec.core import (
+    ECOSYSTEMS,
+    LANGUAGES,
     MacaronBuildToolName,
     compose_shell_commands,
     get_language_version,
@@ -141,3 +145,95 @@ def test_get_language_version(
 ) -> None:
     """Test the get_language_version function."""
     assert get_language_version(build_command_info) == expected
+
+
+@pytest.mark.parametrize(
+    ("base_build_spec_dict"),
+    [
+        pytest.param(
+            BaseBuildSpecDict(
+                {
+                    "macaron_version": "0.20.0",
+                    "group_id": "foo",
+                    "artifact_id": "bar",
+                    "version": "1.0.0",
+                    "git_repo": "bla",
+                    "git_tag": "bla",
+                    "newline": "lf",
+                    "language_version": [],
+                    "ecosystem": "maven",
+                    "purl": "pkg:maven/foo/bar@1.0.0",
+                    "language": LANGUAGES.MAVEN.value,
+                    "build_tools": [MacaronBuildToolName.MAVEN],
+                    "build_commands": [],
+                }
+            ),
+            id="empty build command for maven",
+        ),
+        pytest.param(
+            BaseBuildSpecDict(
+                {
+                    "macaron_version": "0.20.0",
+                    "group_id": "foo",
+                    "artifact_id": "bar",
+                    "version": "1.0.0",
+                    "git_repo": "bla",
+                    "git_tag": "bla",
+                    "newline": "lf",
+                    "language_version": [],
+                    "ecosystem": "maven",
+                    "purl": "pkg:maven/foo/bar@1.0.0",
+                    "language": LANGUAGES.MAVEN.value,
+                    "build_tools": ["ant"],
+                    "build_commands": [["ant", "dist"]],
+                }
+            ),
+            id="unsupported build tool for maven",
+        ),
+        pytest.param(
+            BaseBuildSpecDict(
+                {
+                    "macaron_version": "0.20.0",
+                    "group_id": None,
+                    "artifact_id": "bar",
+                    "version": "1.0.0",
+                    "git_repo": "bla",
+                    "git_tag": "bla",
+                    "newline": "lf",
+                    "language_version": [],
+                    "ecosystem": "pypi",
+                    "purl": "pkg:pypi/bar@1.0.0",
+                    "language": LANGUAGES.PYPI.value,
+                    "build_tools": [MacaronBuildToolName.FLIT],
+                    "build_commands": [],
+                }
+            ),
+            id="empty build command for pypi",
+        ),
+        pytest.param(
+            BaseBuildSpecDict(
+                {
+                    "macaron_version": "0.20.0",
+                    "group_id": None,
+                    "artifact_id": "bar",
+                    "version": "1.0.0",
+                    "git_repo": "bla",
+                    "git_tag": "bla",
+                    "newline": "lf",
+                    "language_version": [],
+                    "ecosystem": "pypi",
+                    "purl": "pkg:pypi/bar@1.0.0",
+                    "language": LANGUAGES.PYPI.value,
+                    "build_tools": ["uv"],
+                    "build_commands": [["python", "-m", "build"]],
+                }
+            ),
+            id="unsupported build tool for pypi",
+        ),
+    ],
+)
+def test_resolve_fields(base_build_spec_dict: BaseBuildSpecDict) -> None:
+    """Test the buildspec field resolution for each ecosystem."""
+    ECOSYSTEMS[base_build_spec_dict["ecosystem"].upper()].value(base_build_spec_dict).resolve_fields(
+        PackageURL.from_string(base_build_spec_dict["purl"])
+    )
diff --git a/tests/build_spec_generator/test_build_command_patcher.py b/tests/build_spec_generator/test_build_command_patcher.py
index b83359698..dad1f04ee 100644
--- a/tests/build_spec_generator/test_build_command_patcher.py
+++ b/tests/build_spec_generator/test_build_command_patcher.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the test for the build command patcher."""
@@ -559,3 +559,26 @@ def test_patching_multiple_commands_error(
         )
         is None
     )
+
+
+@pytest.mark.parametrize(
+    ("original_cmd_sequence"),
+    [
+        pytest.param(
+            [],
+            id="empty sequence",
+        ),
+        pytest.param(
+            [[]],
+            id="empty command",
+        ),
+    ],
+)
+def test_empty_command(maven_cli_parser: MavenCLICommandParser, original_cmd_sequence: list[list[str]]) -> None:
+    """Test the patch command for empty commands."""
+    patch_cmds = _patch_commands(
+        cmds_sequence=original_cmd_sequence,
+        cli_parsers=[maven_cli_parser],
+        patches={PatchCommandBuildTool.MAVEN: {}},
+    )
+    assert patch_cmds == []

From bdd3448373d761eda42ef20c582143f69c8bfea8 Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Wed, 18 Feb 2026 11:59:03 +1000
Subject: [PATCH 17/20] feat: improve buildspec and dockerfile generation
 (#1279)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../common_spec/pypi_spec.py                  |  30 +++-
 .../dockerfile/pypi_dockerfile_output.py      | 161 ++++++++++++++++--
 .../package_registry/pypi_registry.py         |  46 +++++
 .../test_pypi_dockerfile_output.ambr          |  29 +++-
 .../expected_dockerfile.buildspec             |  29 +++-
 .../expected_default.buildspec                |   3 +-
 .../expected_dockerfile.buildspec             |  27 ++-
 .../pypi_toga/expected_default.buildspec      |   3 +-
 .../pypi_toga/expected_dockerfile.buildspec   |  29 +++-
 .../expected_default.buildspec                |   2 +-
 10 files changed, 313 insertions(+), 46 deletions(-)

diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index 097648214..328481f45 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -112,8 +112,9 @@ def resolve_fields(self, purl: PackageURL) -> None:
         parsed_build_requires: dict[str, str] = {}
         sdist_build_requires: dict[str, str] = {}
         python_version_set: set[str] = set()
-        wheel_name_python_version_list: list[str] = []
+        wheel_name_python_version_set: set[str] = set()
         wheel_name_platforms: set[str] = set()
+        dependency_python_version_set: set[str] = set()
         # Precautionary fallback to default version
         chronologically_likeliest_version: str = defaults.get("heuristic.pypi", "default_setuptools")
 
@@ -128,6 +129,8 @@ def resolve_fields(self, purl: PackageURL) -> None:
                         if py_version := json_extract(release, ["requires_python"], str):
                             python_version_set.add(py_version.replace(" ", ""))
 
+                logger.debug("From package JSON inferred Python constraints: %s", python_version_set)
+
                 self.data["has_binaries"] = not pypi_package_json.has_pure_wheel()
 
                 if self.data["has_binaries"]:
@@ -162,9 +165,13 @@ def resolve_fields(self, purl: PackageURL) -> None:
                             logger.debug(pypi_package_json.wheel_filename)
                             _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename)
                             for tag in tags:
-                                wheel_name_python_version_list.append(tag.interpreter)
+                                wheel_name_python_version_set.add(tag.interpreter)
                                 wheel_name_platforms.add(tag.platform)
-                            logger.debug(python_version_set)
+                            if wheel_name_python_version_set:
+                                logger.debug(
+                                    "From wheel name inferred Python constraints: %s", wheel_name_python_version_set
+                                )
+                                python_version_set.update(wheel_name_python_version_set)
                         except InvalidWheelFilename:
                             logger.debug("Could not parse wheel file name to extract version")
                 except WheelTagError:
@@ -234,8 +241,6 @@ def resolve_fields(self, purl: PackageURL) -> None:
                     if requirement_name not in parsed_build_requires:
                         parsed_build_requires[requirement_name] = specifier
 
-                self.data["language_version"] = list(python_version_set) or wheel_name_python_version_list
-
         # If we were not able to find any build  and backends, use the default setuptools.
         if not parsed_build_requires:
             parsed_build_requires["setuptools"] = "==" + defaults.get("heuristic.pypi", "default_setuptools")
@@ -243,6 +248,21 @@ def resolve_fields(self, purl: PackageURL) -> None:
             build_backends_set.add("setuptools.build_meta")
 
         logger.debug("Combined build-requires: %s", parsed_build_requires)
+
+        for package, constraint in parsed_build_requires.items():
+            package_requirement = package + constraint
+            python_version_constraints = registry.get_python_requires_for_package_requirement(package_requirement)
+            if python_version_constraints:
+                dependency_python_version_set.add(python_version_constraints)
+
+        # We will prefer to use Python version constraints from the package's
+        # dependencies. In the case that such inference was unsuccessful, we default
+        # to the Python version constraints inferred from other sources.
+        if dependency_python_version_set:
+            self.data["language_version"] = sorted(dependency_python_version_set)
+        else:
+            self.data["language_version"] = sorted(python_version_set)
+
         self.data["build_requires"] = parsed_build_requires
         self.data["build_backends"] = list(build_backends_set)
         # We do not generate a build command for non-pure packages
diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
index adb956346..87e5a1d0d 100644
--- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
+++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
@@ -7,11 +7,13 @@
 import re
 from textwrap import dedent
 
+from bs4 import BeautifulSoup, FeatureNotFound
 from packaging.specifiers import InvalidSpecifier, SpecifierSet
 from packaging.version import InvalidVersion, Version
 
 from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpecDict
 from macaron.errors import GenerateBuildSpecError
+from macaron.util import send_get_http_raw
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -36,9 +38,18 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
     """
     if buildspec["has_binaries"]:
         raise GenerateBuildSpecError("We currently do not support generating a dockerfile for non-pure Python packages")
-    language_version: str | None = pick_specific_version(buildspec)
+    language_version: str | None = pick_specific_version(buildspec["language_version"])
     if language_version is None:
         raise GenerateBuildSpecError("Could not derive specific interpreter version")
+    try:
+        version = Version(language_version)
+    except InvalidVersion as error:
+        logger.debug("Ran into issue converting %s to a version: %s", language_version, error)
+        raise GenerateBuildSpecError("Derived interpreter version could not be parsed") from error
+    if not buildspec["build_tools"]:
+        raise GenerateBuildSpecError("Cannot generate dockerfile when build tool is unknown")
+    if not buildspec["build_commands"]:
+        raise GenerateBuildSpecError("Cannot generate dockerfile when build command is unknown")
     backend_install_commands: str = " && ".join(build_backend_commands(buildspec))
     build_tool_install: str = ""
     if (
@@ -51,6 +62,12 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
         build_tool_install = (
             f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && "
         )
+    modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0])
+    legacy_build_command = (
+        'if test -f "setup.py"; then pip install wheel && python setup.py bdist_wheel; '
+        "else python -m build --wheel -n; fi"
+    )
+
     dockerfile_content = f"""
     #syntax=docker/dockerfile:1.10
     FROM oraclelinux:9
@@ -73,13 +90,22 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
       gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \\
       readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \\
       xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \\
-      perf expat expat-devel mpdecimal python3-pip
+      perf expat expat-devel mpdecimal python3-pip \\
+      perl perl-File-Compare
+
+    {openssl_install_commands(version)}
+
+    ENV LD_LIBRARY_PATH=/opt/openssl/lib
+    ENV CPPFLAGS=-I/opt/openssl/include
+    ENV LDFLAGS=-L/opt/openssl/lib
 
     # Build interpreter and create venv
     RUN <<EOF
         cd Python-{language_version}
         ./configure --with-pydebug
         make -s -j $(nproc)
+        make install
+        ./python -m ensurepip
         ./python -m venv /deps
     EOF
 
@@ -100,29 +126,78 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
     EOF
 
     # Run the build
-    RUN {"source /deps/bin/activate && " + build_tool_install + " ".join(x for x in buildspec["build_commands"][0])}
+    RUN source /deps/bin/activate &&  {modern_build_command if version in SpecifierSet(">=3.6") else legacy_build_command}
     """
 
     return dedent(dockerfile_content)
 
 
-def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None:
+def openssl_install_commands(version: Version) -> str:
+    """Appropriate openssl install commands for a given CPython version.
+
+    Parameters
+    ----------
+    version: Version
+        CPython version we are trying to build
+
+    Returns
+    -------
+    str
+       Install commands for the corresponding openssl version
+    """
+    # As per https://peps.python.org/pep-0644, all Python >= 3.10 requires at least OpenSSL 1.1.1,
+    # and 3.6 to 3.9 can be compiled with OpenSSL 1.1.1. Therefore, we compile as below:
+    if version in SpecifierSet(">=3.6"):
+        openssl_version = "1.1.1w"
+        source_url = "https://www.openssl.org/source/old/1.1.1/openssl-1.1.1w.tar.gz"
+    # From the same document, "Python versions 3.6 to 3.9 are compatible with OpenSSL 1.0.2,
+    # 1.1.0, and 1.1.1". As an attempt to generalize for any >= 3.3, we use OpenSSL 1.0.2.
+    else:
+        openssl_version = "1.0.2u"
+        source_url = "https://www.openssl.org/source/old/1.0.2/openssl-1.0.2u.tar.gz"
+
+    return f"""# Build OpenSSL {openssl_version}
+    RUN <<EOF
+        wget {source_url}
+        tar xzf openssl-{openssl_version}.tar.gz
+        cd openssl-{openssl_version}
+        ./config --prefix=/opt/openssl --openssldir=/opt/openssl shared zlib
+        make -j"$(nproc)"
+        make install_sw
+    EOF"""
+
+
+def pick_specific_version(inferred_constraints: list[str]) -> str | None:
     """Find the latest python interpreter version satisfying inferred constraints.
 
     Parameters
     ----------
-    buildspec: BaseBuildSpecDict
-        The base build spec generated for the artifact.
+    inferred_constraints: list[str]
+        List of inferred Python version constraints
 
     Returns
     -------
     str | None
         String in format major.minor.patch for the latest valid Python
         interpreter version, or None if no such version can be found.
+
+    Examples
+    --------
+    >>> pick_specific_version([">=3.0"])
+    '3.4.10'
+    >>> pick_specific_version([">=3.8"])
+    '3.8.20'
+    >>> pick_specific_version([">=3.0", "!=3.4", "!=3.3", "!=3.5"])
+    '3.6.15'
+    >>> pick_specific_version(["<=3.12"])
+    '3.4.10'
+    >>> pick_specific_version(["<=3.12", "==3.6"])
+    '3.6.15'
     """
-    # We can most smoothly rebuild Python 3.0.0 and above on OL
-    version_set = SpecifierSet(">=3.0.0")
-    for version in buildspec["language_version"]:
+    # We cannot create virtual environments for Python versions <= 3.3.0, as
+    # it did not exist back then
+    version_set = SpecifierSet(">=3.4.0")
+    for version in inferred_constraints:
         try:
             version_set &= SpecifierSet(version)
         except InvalidSpecifier as error:
@@ -139,14 +214,14 @@ def pick_specific_version(buildspec: BaseBuildSpecDict) -> str | None:
 
     logger.debug(version_set)
 
-    # Now to get the latest acceptable one, we can step through all interpreter
+    # Now to get the earliest acceptable one, we can step through all interpreter
     # versions. For the most accurate result, we can query python.org for a
-    # list of all versions, but for now we can approximate by stepping down
-    # through every minor version from 3.14.0 to 3.0.0
-    for minor in range(14, -1, -1):
+    # list of all versions, but for now we can approximate by stepping up
+    # through every minor version from 3.3.0 to 3.14.0
+    for minor in range(3, 15, 1):
         try:
             if Version(f"3.{minor}.0") in version_set:
-                return f"3.{minor}.0"
+                return get_latest_cpython_patch(3, minor)
         except InvalidVersion as error:
             logger.debug("Ran into issue converting %s to a version: %s", minor, error)
             return None
@@ -197,6 +272,59 @@ def infer_interpreter_version(specifier: str) -> str | None:
     return None
 
 
+def get_latest_cpython_patch(major: int, minor: int) -> str:
+    """Given major and minor interpreter version, return latest CPython patched version.
+
+    Parameters
+    ----------
+    major: int
+        Major component of version
+    minor: int
+        Minor component of version
+
+    Returns
+    -------
+    str
+        Full major.minor.patch version string corresponding to latest
+        patch for input major and minor.
+    """
+    latest_patch: Version | None = None
+    # We install CPython source
+    response = send_get_http_raw("https://www.python.org/ftp/python/")
+    if not response:
+        raise GenerateBuildSpecError("Failed to fetch index of CPython versions.")
+
+    html: str = ""
+    soup: BeautifulSoup | None = None
+
+    try:
+        html = response.content.decode("utf-8")
+        soup = BeautifulSoup(html, "html.parser")
+    except (UnicodeDecodeError, FeatureNotFound) as error:
+        raise GenerateBuildSpecError("Failed to parse index of CPython versions.") from error
+
+    # Versions can most reliably be found in anchor tags like:
+    # <a href="{Version}/"> {Version}/ </a>
+    for anchor in soup.find_all("a", href=True):
+        # Get text enclosed in the anchor tag stripping spaces.
+        text = anchor.get_text(strip=True)
+        sanitized_text = text.rstrip("/")
+        # Try to convert to a version.
+        try:
+            parsed_version = Version(sanitized_text)
+            if parsed_version.major == major and parsed_version.minor == minor:
+                if latest_patch is None or parsed_version > latest_patch:
+                    latest_patch = parsed_version
+        except InvalidVersion:
+            # Try the next tag
+            continue
+
+    if not latest_patch:
+        raise GenerateBuildSpecError(f"Failed to infer latest patch for CPython {major}.{minor}")
+
+    return str(latest_patch)
+
+
 def build_backend_commands(buildspec: BaseBuildSpecDict) -> list[str]:
     """Generate the installation commands for each inferred build backend.
 
@@ -214,7 +342,10 @@ def build_backend_commands(buildspec: BaseBuildSpecDict) -> list[str]:
         return []
     commands: list[str] = []
     for backend, version_constraint in buildspec["build_requires"].items():
-        commands.append(f'/deps/bin/pip install "{backend}{version_constraint}"')
+        if backend == "setuptools":
+            commands.append("/deps/bin/pip install --upgrade setuptools")
+        else:
+            commands.append(f'/deps/bin/pip install "{backend}{version_constraint}"')
     # For a stable order on the install commands
     commands.sort()
     return commands
diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
index e11c8260a..935f662c7 100644
--- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
+++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
@@ -22,7 +22,9 @@
 
 import requests
 from bs4 import BeautifulSoup, Tag
+from packaging.requirements import InvalidRequirement, Requirement
 from packaging.utils import InvalidWheelFilename, parse_wheel_filename
+from packaging.version import InvalidVersion, Version
 
 from macaron.config.defaults import defaults
 from macaron.errors import ConfigurationError, InvalidHTTPResponseError, SourceCodeError, WheelTagError
@@ -540,6 +542,50 @@ def get_matching_setuptools_version(self, package_release_datetime: datetime) ->
         # Return default just in case.
         return defaults.get("heuristic.pypi", "default_setuptools")
 
+    def get_python_requires_for_package_requirement(self, package_requirement: str) -> str | None:
+        """Return the Python version constraint string for earliest version of the package satisfying package_requirement.
+
+        Parameters
+        ----------
+        package_constraint: str
+            pip style requirement string.
+
+        Returns
+        -------
+        str | None
+            Corresponding Python version constraint string.
+        """
+        try:
+            parsed_requirement = Requirement(package_requirement)
+            endpoint = urllib.parse.urljoin(self.registry_url, f"pypi/{parsed_requirement.name}/json")
+            json = self.download_package_json(endpoint)
+            releases = json_extract(json, ["releases"], dict)
+            if releases:
+                # Find smallest requirement satisfying parsed_requirement.name
+                version_tuples: list[tuple[str, Version]] = []
+                for version in releases.keys():
+                    try:
+                        version_name = str(version)
+                        parsed_version = Version(version_name)
+                        if parsed_version in parsed_requirement.specifier:
+                            version_tuple = (version_name, parsed_version)
+                            version_tuples.append(version_tuple)
+                    except InvalidVersion:
+                        continue
+                if not version_tuples:
+                    return None
+                lowest_staisfying_version = min(version_tuples, key=lambda version_tuple: version_tuple[1])
+                release_info = releases[lowest_staisfying_version[0]]
+                if isinstance(release_info, list) and release_info:
+                    release = release_info[0]
+                    if isinstance(release, dict):
+                        constraint_specification = release.get("requires_python")
+                        if isinstance(constraint_specification, str):
+                            return constraint_specification
+            return None
+        except InvalidRequirement:
+            return None
+
     @staticmethod
     def extract_attestation(attestation_data: dict) -> dict | None:
         """Extract the first attestation file from a PyPI attestation response.
diff --git a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
index 696ee6f8d..655628572 100644
--- a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
+++ b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
@@ -13,8 +13,8 @@
   
   # Download and unzip interpreter
   RUN <<EOF
-      wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
-      tar -xf Python-3.14.0.tgz
+      wget https://www.python.org/ftp/python/3.9.25/Python-3.9.25.tgz
+      tar -xf Python-3.9.25.tgz
   EOF
   
   # Install necessary libraries to build the interpreter
@@ -23,13 +23,30 @@
     gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
     readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
     xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
-    perf expat expat-devel mpdecimal python3-pip
+    perf expat expat-devel mpdecimal python3-pip \
+    perl perl-File-Compare
+  
+  # Build OpenSSL 1.1.1w
+  RUN <<EOF
+      wget https://www.openssl.org/source/old/1.1.1/openssl-1.1.1w.tar.gz
+      tar xzf openssl-1.1.1w.tar.gz
+      cd openssl-1.1.1w
+      ./config --prefix=/opt/openssl --openssldir=/opt/openssl shared zlib
+      make -j"$(nproc)"
+      make install_sw
+  EOF
+  
+  ENV LD_LIBRARY_PATH=/opt/openssl/lib
+  ENV CPPFLAGS=-I/opt/openssl/include
+  ENV LDFLAGS=-L/opt/openssl/lib
   
   # Build interpreter and create venv
   RUN <<EOF
-      cd Python-3.14.0
+      cd Python-3.9.25
       ./configure --with-pydebug
       make -s -j $(nproc)
+      make install
+      ./python -m ensurepip
       ./python -m venv /deps
   EOF
   
@@ -45,12 +62,12 @@
   
   # Install build and the build backends
   RUN <<EOF
-      /deps/bin/pip install "setuptools==80.9.0" && /deps/bin/pip install "wheel"
+      /deps/bin/pip install "wheel" && /deps/bin/pip install --upgrade setuptools
       /deps/bin/pip install build
   EOF
   
   # Run the build
-  RUN source /deps/bin/activate && python -m build
+  RUN source /deps/bin/activate &&  python -m build
   
   '''
 # ---
diff --git a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
index 749757f91..254f0b56e 100644
--- a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
@@ -10,8 +10,8 @@ RUN dnf -y install gcc make
 
 # Download and unzip interpreter
 RUN <<EOF
-    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
-    tar -xf Python-3.14.0.tgz
+    wget https://www.python.org/ftp/python/3.9.25/Python-3.9.25.tgz
+    tar -xf Python-3.9.25.tgz
 EOF
 
 # Install necessary libraries to build the interpreter
@@ -20,13 +20,30 @@ RUN dnf install \
   gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
   readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
   xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
-  perf expat expat-devel mpdecimal python3-pip
+  perf expat expat-devel mpdecimal python3-pip \
+  perl perl-File-Compare
+
+# Build OpenSSL 1.1.1w
+RUN <<EOF
+    wget https://www.openssl.org/source/old/1.1.1/openssl-1.1.1w.tar.gz
+    tar xzf openssl-1.1.1w.tar.gz
+    cd openssl-1.1.1w
+    ./config --prefix=/opt/openssl --openssldir=/opt/openssl shared zlib
+    make -j"$(nproc)"
+    make install_sw
+EOF
+
+ENV LD_LIBRARY_PATH=/opt/openssl/lib
+ENV CPPFLAGS=-I/opt/openssl/include
+ENV LDFLAGS=-L/opt/openssl/lib
 
 # Build interpreter and create venv
 RUN <<EOF
-    cd Python-3.14.0
+    cd Python-3.9.25
     ./configure --with-pydebug
     make -s -j $(nproc)
+    make install
+    ./python -m ensurepip
     ./python -m venv /deps
 EOF
 
@@ -42,9 +59,9 @@ WORKDIR /src
 
 # Install build and the build backends
 RUN <<EOF
-    /deps/bin/pip install "setuptools==80.9.0" && /deps/bin/pip install "wheel"
+    /deps/bin/pip install "wheel" && /deps/bin/pip install --upgrade setuptools
     /deps/bin/pip install build
 EOF
 
 # Run the build
-RUN source /deps/bin/activate && python -m build --wheel -n
+RUN source /deps/bin/activate &&  python -m build --wheel -n
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
index e610ee866..3fbb4fcbc 100644
--- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
@@ -7,7 +7,8 @@
     "git_tag": "c62983f1554124391b47170180e6c62df4d476ca",
     "newline": "lf",
     "language_version": [
-        ">=3.10"
+        ">=3.6",
+        ">=3.8"
     ],
     "ecosystem": "pypi",
     "purl": "pkg:pypi/markdown-it-py@4.0.0",
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
index 981619196..e4133eb2c 100644
--- a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
@@ -10,8 +10,8 @@ RUN dnf -y install gcc make
 
 # Download and unzip interpreter
 RUN <<EOF
-    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
-    tar -xf Python-3.14.0.tgz
+    wget https://www.python.org/ftp/python/3.8.20/Python-3.8.20.tgz
+    tar -xf Python-3.8.20.tgz
 EOF
 
 # Install necessary libraries to build the interpreter
@@ -20,13 +20,30 @@ RUN dnf install \
   gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
   readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
   xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
-  perf expat expat-devel mpdecimal python3-pip
+  perf expat expat-devel mpdecimal python3-pip \
+  perl perl-File-Compare
+
+# Build OpenSSL 1.1.1w
+RUN <<EOF
+    wget https://www.openssl.org/source/old/1.1.1/openssl-1.1.1w.tar.gz
+    tar xzf openssl-1.1.1w.tar.gz
+    cd openssl-1.1.1w
+    ./config --prefix=/opt/openssl --openssldir=/opt/openssl shared zlib
+    make -j"$(nproc)"
+    make install_sw
+EOF
+
+ENV LD_LIBRARY_PATH=/opt/openssl/lib
+ENV CPPFLAGS=-I/opt/openssl/include
+ENV LDFLAGS=-L/opt/openssl/lib
 
 # Build interpreter and create venv
 RUN <<EOF
-    cd Python-3.14.0
+    cd Python-3.8.20
     ./configure --with-pydebug
     make -s -j $(nproc)
+    make install
+    ./python -m ensurepip
     ./python -m venv /deps
 EOF
 
@@ -47,4 +64,4 @@ RUN <<EOF
 EOF
 
 # Run the build
-RUN source /deps/bin/activate && pip install flit && if test -f "flit.ini"; then python -m flit.tomlify; fi && flit build
+RUN source /deps/bin/activate &&  pip install flit && if test -f "flit.ini"; then python -m flit.tomlify; fi && flit build
diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec
index 875523655..076503858 100644
--- a/tests/integration/cases/pypi_toga/expected_default.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_default.buildspec
@@ -1,5 +1,5 @@
 {
-    "macaron_version": "0.18.0",
+    "macaron_version": "0.20.0",
     "group_id": null,
     "artifact_id": "toga",
     "version": "0.5.1",
@@ -7,6 +7,7 @@
     "git_tag": "ef1912b0a1b5c07793f9aa372409f5b9d36f2604",
     "newline": "lf",
     "language_version": [
+        ">=3.8",
         ">=3.9"
     ],
     "ecosystem": "pypi",
diff --git a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
index 47e1e012a..d50340d8b 100644
--- a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
@@ -10,8 +10,8 @@ RUN dnf -y install gcc make
 
 # Download and unzip interpreter
 RUN <<EOF
-    wget https://www.python.org/ftp/python/3.14.0/Python-3.14.0.tgz
-    tar -xf Python-3.14.0.tgz
+    wget https://www.python.org/ftp/python/3.9.25/Python-3.9.25.tgz
+    tar -xf Python-3.9.25.tgz
 EOF
 
 # Install necessary libraries to build the interpreter
@@ -20,13 +20,30 @@ RUN dnf install \
   gcc-c++ gdb lzma glibc-devel libstdc++-devel openssl-devel \
   readline-devel zlib-devel libzstd-devel libffi-devel bzip2-devel \
   xz-devel sqlite sqlite-devel sqlite-libs libuuid-devel gdbm-libs \
-  perf expat expat-devel mpdecimal python3-pip
+  perf expat expat-devel mpdecimal python3-pip \
+  perl perl-File-Compare
+
+# Build OpenSSL 1.1.1w
+RUN <<EOF
+    wget https://www.openssl.org/source/old/1.1.1/openssl-1.1.1w.tar.gz
+    tar xzf openssl-1.1.1w.tar.gz
+    cd openssl-1.1.1w
+    ./config --prefix=/opt/openssl --openssldir=/opt/openssl shared zlib
+    make -j"$(nproc)"
+    make install_sw
+EOF
+
+ENV LD_LIBRARY_PATH=/opt/openssl/lib
+ENV CPPFLAGS=-I/opt/openssl/include
+ENV LDFLAGS=-L/opt/openssl/lib
 
 # Build interpreter and create venv
 RUN <<EOF
-    cd Python-3.14.0
+    cd Python-3.9.25
     ./configure --with-pydebug
     make -s -j $(nproc)
+    make install
+    ./python -m ensurepip
     ./python -m venv /deps
 EOF
 
@@ -42,9 +59,9 @@ WORKDIR /src
 
 # Install build and the build backends
 RUN <<EOF
-    /deps/bin/pip install "setuptools==80.3.1" && /deps/bin/pip install "setuptools_dynamic_dependencies==1.0.0" && /deps/bin/pip install "setuptools_scm==8.3.1"
+    /deps/bin/pip install "setuptools_dynamic_dependencies==1.0.0" && /deps/bin/pip install "setuptools_scm==8.3.1" && /deps/bin/pip install --upgrade setuptools
     /deps/bin/pip install build
 EOF
 
 # Run the build
-RUN source /deps/bin/activate && python -m build --wheel -n
+RUN source /deps/bin/activate &&  python -m build --wheel -n
diff --git a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
index 5eccc6d34..c0612c42d 100644
--- a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
+++ b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
@@ -7,7 +7,7 @@
     "git_tag": "e2a5b21449c30c6a4fb49a55567a4699c3271f10",
     "newline": "lf",
     "language_version": [
-        ">=3.10"
+        "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
     ],
     "ecosystem": "pypi",
     "purl": "pkg:pypi/tree-sitter@0.25.2",

From fe4a048b18e327b7d01ae5dc05b7554715fc90ba Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Wed, 18 Feb 2026 15:47:54 +1000
Subject: [PATCH 18/20] feat: validate buildspec dockerfile (#1280)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 .../common_spec/base_spec.py                  |  3 ++
 .../common_spec/pypi_spec.py                  |  5 +++
 .../dockerfile/pypi_dockerfile_output.py      | 38 ++++++++++++++++++-
 .../package_registry/pypi_registry.py         |  8 ++++
 .../test_pypi_dockerfile_output.ambr          | 26 ++++++++++++-
 .../dockerfile/test_pypi_dockerfile_output.py | 10 +++++
 .../expected_default.buildspec                | 10 ++++-
 .../expected_dockerfile.buildspec             | 26 ++++++++++++-
 .../expected_default.buildspec                | 10 ++++-
 .../expected_dockerfile.buildspec             | 26 ++++++++++++-
 .../pypi_toga/expected_default.buildspec      | 10 ++++-
 .../pypi_toga/expected_dockerfile.buildspec   | 26 ++++++++++++-
 .../expected_default.buildspec                |  5 ++-
 .../cases/pypi_tree-sitter/test.yaml          |  2 +-
 14 files changed, 195 insertions(+), 10 deletions(-)

diff --git a/src/macaron/build_spec_generator/common_spec/base_spec.py b/src/macaron/build_spec_generator/common_spec/base_spec.py
index 698a0b948..6477801fd 100644
--- a/src/macaron/build_spec_generator/common_spec/base_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/base_spec.py
@@ -84,6 +84,9 @@ class BaseBuildSpecDict(TypedDict, total=False):
     #: Flag to indicate if the artifact includes binaries.
     has_binaries: NotRequired[bool]
 
+    #: The artifacts that were analyzed in generating the build specification.
+    upstream_artifacts: dict[str, list[str]]
+
 
 class BaseBuildSpec(ABC):
     """Abstract base class for build specification behavior and field resolution."""
diff --git a/src/macaron/build_spec_generator/common_spec/pypi_spec.py b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
index 328481f45..ee67578c9 100644
--- a/src/macaron/build_spec_generator/common_spec/pypi_spec.py
+++ b/src/macaron/build_spec_generator/common_spec/pypi_spec.py
@@ -106,6 +106,7 @@ def resolve_fields(self, purl: PackageURL) -> None:
             metadata=[],
         )
 
+        upstream_artifacts: dict[str, list[str]] = {}
         pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info)
         patched_build_commands: list[list[str]] = []
         build_backends_set: set[str] = set()
@@ -141,6 +142,7 @@ def resolve_fields(self, purl: PackageURL) -> None:
                 try:
                     # The wheel function handles downloading binaries in the case that we cannot find a pure wheel.
                     with pypi_package_json.wheel(download_binaries=self.data["has_binaries"]):
+                        upstream_artifacts["wheels"] = pypi_package_json.wheel_urls
                         logger.debug("Wheel at %s", pypi_package_json.wheel_path)
                         # Should only have .dist-info directory.
                         logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path)))
@@ -184,6 +186,8 @@ def resolve_fields(self, purl: PackageURL) -> None:
 
                 try:
                     with pypi_package_json.sourcecode():
+                        upstream_artifacts["sdist"] = [pypi_package_json.sdist_url]
+                        logger.debug("sdist url at %s", upstream_artifacts["sdist"])
                         try:
                             # Get the build time requirements from ["build-system", "requires"]
                             pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml")
@@ -269,6 +273,7 @@ def resolve_fields(self, purl: PackageURL) -> None:
         if not self.data["has_binaries"]:
             patched_build_commands = self.get_default_build_commands(self.data["build_tools"])
         self.data["build_commands"] = patched_build_commands
+        self.data["upstream_artifacts"] = upstream_artifacts
 
     def add_parsed_requirement(self, build_requirements: dict[str, str], requirement: str) -> None:
         """
diff --git a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
index 87e5a1d0d..67d1c6308 100644
--- a/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
+++ b/src/macaron/build_spec_generator/dockerfile/pypi_dockerfile_output.py
@@ -62,18 +62,30 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
         build_tool_install = (
             f"pip install {buildspec['build_tools'][0]} && if test -f \"flit.ini\"; then python -m flit.tomlify; fi && "
         )
+
     modern_build_command = build_tool_install + " ".join(x for x in buildspec["build_commands"][0])
     legacy_build_command = (
         'if test -f "setup.py"; then pip install wheel && python setup.py bdist_wheel; '
         "else python -m build --wheel -n; fi"
     )
 
+    wheel_url: str = ""
+    wheel_name: str = ""
+
+    wheel_urls = buildspec["upstream_artifacts"]["wheels"]
+    # We currently only look for the pure wheel, if it exists
+    if wheel_urls:
+        wheel_url = list(wheel_urls)[0]
+        wheel_name = wheel_url.rsplit("/", 1)[-1]
+    else:
+        logger.debug("We could not find an upstream artifact, and therefore we cannot run validation")
+
     dockerfile_content = f"""
     #syntax=docker/dockerfile:1.10
     FROM oraclelinux:9
 
     # Install core tools
-    RUN dnf -y install which wget tar git
+    RUN dnf -y install which wget tar unzip git
 
     # Install compiler and make
     RUN dnf -y install gcc make
@@ -127,6 +139,30 @@ def gen_dockerfile(buildspec: BaseBuildSpecDict) -> str:
 
     # Run the build
     RUN source /deps/bin/activate &&  {modern_build_command if version in SpecifierSet(">=3.6") else legacy_build_command}
+
+    # Validate script
+    RUN cat <<'EOF' >/validate
+        [ -n "{wheel_url}" ] || {{ echo "No upstream artifact to validate against."; exit 1; }}
+        # Capture artifacts generated
+        WHEELS=(/src/dist/*.whl)
+        # Ensure we only have one artifact
+        [ ${{#WHEELS[@]}} -eq 1 ] || {{ echo "Unexpected artifacts produced!"; exit 1; }}
+        # BUILT_WHEEL is the artifact we built
+        BUILT_WHEEL=${{WHEELS[0]}}
+        # Ensure the artifact produced is not the literal returned by the glob
+        [ -e $BUILT_WHEEL ] || {{ echo "No wheels found!"; exit 1; }}
+        # Download the wheel
+        wget -q {wheel_url}
+        # Compare wheel names
+        [ $(basename $BUILT_WHEEL) == "{wheel_name}" ] || {{ echo "Wheel name does not match!"; exit 1; }}
+        # Compare file tree
+        (unzip -Z1 $BUILT_WHEEL | grep -v '\\.dist-info' | sort) > built.tree
+        (unzip -Z1 "{wheel_name}" | grep -v '\\.dist-info' | sort ) > pypi_artifact.tree
+        diff -u built.tree pypi_artifact.tree || {{ echo "File trees do not match!"; exit 1; }}
+        echo "Success!"
+    EOF
+
+    ENTRYPOINT ["/bin/bash","/validate"]
     """
 
     return dedent(dockerfile_content)
diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
index 935f662c7..9f988ed80 100644
--- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
+++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
@@ -704,6 +704,12 @@ class PyPIPackageJsonAsset:
     #: The source code temporary location name.
     package_sourcecode_path: str = field(init=False)
 
+    #: URL of the sdist file.
+    sdist_url: str = field(init=False)
+
+    #: URL of the wheel file.
+    wheel_urls: list[str] = field(init=False)
+
     #: The wheel temporary location name.
     wheel_path: str = field(init=False)
 
@@ -832,6 +838,7 @@ def get_sourcecode_url(self, package_type: str = "sdist") -> str | None:
                         fragment="",
                     ).geturl()
                     logger.debug("Found source URL: %s", configured_source_url)
+                    self.sdist_url = configured_source_url
                     return configured_source_url
         return None
 
@@ -892,6 +899,7 @@ def get_wheel_url(self, tag: str = "none-any") -> str | None:
                         fragment="",
                     ).geturl()
                     logger.debug("Found wheel URL: %s", configured_wheel_url)
+                    self.wheel_urls = [configured_wheel_url]
                     return configured_wheel_url
         return None
 
diff --git a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
index 655628572..8ff65b0da 100644
--- a/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
+++ b/tests/build_spec_generator/dockerfile/__snapshots__/test_pypi_dockerfile_output.ambr
@@ -6,7 +6,7 @@
   FROM oraclelinux:9
   
   # Install core tools
-  RUN dnf -y install which wget tar git
+  RUN dnf -y install which wget tar unzip git
   
   # Install compiler and make
   RUN dnf -y install gcc make
@@ -69,5 +69,29 @@
   # Run the build
   RUN source /deps/bin/activate &&  python -m build
   
+  # Validate script
+  RUN cat <<'EOF' >/validate
+      [ -n "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl" ] || { echo "No upstream artifact to validate against."; exit 1; }
+      # Capture artifacts generated
+      WHEELS=(/src/dist/*.whl)
+      # Ensure we only have one artifact
+      [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; }
+      # BUILT_WHEEL is the artifact we built
+      BUILT_WHEEL=${WHEELS[0]}
+      # Ensure the artifact produced is not the literal returned by the glob
+      [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; }
+      # Download the wheel
+      wget -q https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl
+      # Compare wheel names
+      [ $(basename $BUILT_WHEEL) == "cachetools-6.2.1-py3-none-any.whl" ] || { echo "Wheel name does not match!"; exit 1; }
+      # Compare file tree
+      (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree
+      (unzip -Z1 "cachetools-6.2.1-py3-none-any.whl" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree
+      diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; }
+      echo "Success!"
+  EOF
+  
+  ENTRYPOINT ["/bin/bash","/validate"]
+  
   '''
 # ---
diff --git a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
index c8d4d8882..4c8902325 100644
--- a/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
+++ b/tests/build_spec_generator/dockerfile/test_pypi_dockerfile_output.py
@@ -32,6 +32,16 @@ def fixture_base_build_spec() -> BaseBuildSpecDict:
             "build_commands": [["python", "-m", "build"]],
             "build_requires": {"setuptools": "==80.9.0", "wheel": ""},
             "build_backends": ["setuptools.build_meta"],
+            "upstream_artifacts": {
+                "wheels": [
+                    "https://files.pythonhosted.org/packages/96/c5/"
+                    "1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl"
+                ],
+                "sdist": [
+                    "https://files.pythonhosted.org/packages/cc/7e/"
+                    "b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz"
+                ],
+            },
         }
     )
 
diff --git a/tests/integration/cases/pypi_cachetools/expected_default.buildspec b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
index 2a05c0e95..87859fbd4 100644
--- a/tests/integration/cases/pypi_cachetools/expected_default.buildspec
+++ b/tests/integration/cases/pypi_cachetools/expected_default.buildspec
@@ -31,5 +31,13 @@
     },
     "build_backends": [
         "setuptools.build_meta"
-    ]
+    ],
+    "upstream_artifacts": {
+        "wheels": [
+            "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl"
+        ],
+        "sdist": [
+            "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz"
+        ]
+    }
 }
diff --git a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
index 254f0b56e..9fbfdddd3 100644
--- a/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_cachetools/expected_dockerfile.buildspec
@@ -3,7 +3,7 @@
 FROM oraclelinux:9
 
 # Install core tools
-RUN dnf -y install which wget tar git
+RUN dnf -y install which wget tar unzip git
 
 # Install compiler and make
 RUN dnf -y install gcc make
@@ -65,3 +65,27 @@ EOF
 
 # Run the build
 RUN source /deps/bin/activate &&  python -m build --wheel -n
+
+# Validate script
+RUN cat <<'EOF' >/validate
+    [ -n "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl" ] || { echo "No upstream artifact to validate against."; exit 1; }
+    # Capture artifacts generated
+    WHEELS=(/src/dist/*.whl)
+    # Ensure we only have one artifact
+    [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; }
+    # BUILT_WHEEL is the artifact we built
+    BUILT_WHEEL=${WHEELS[0]}
+    # Ensure the artifact produced is not the literal returned by the glob
+    [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; }
+    # Download the wheel
+    wget -q https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl
+    # Compare wheel names
+    [ $(basename $BUILT_WHEEL) == "cachetools-6.2.1-py3-none-any.whl" ] || { echo "Wheel name does not match!"; exit 1; }
+    # Compare file tree
+    (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree
+    (unzip -Z1 "cachetools-6.2.1-py3-none-any.whl" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree
+    diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; }
+    echo "Success!"
+EOF
+
+ENTRYPOINT ["/bin/bash","/validate"]
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
index 3fbb4fcbc..de0634640 100644
--- a/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_default.buildspec
@@ -29,5 +29,13 @@
     },
     "build_backends": [
         "flit_core.buildapi"
-    ]
+    ],
+    "upstream_artifacts": {
+        "wheels": [
+            "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl"
+        ],
+        "sdist": [
+            "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz"
+        ]
+    }
 }
diff --git a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
index e4133eb2c..e6596fc1b 100644
--- a/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_markdown-it-py/expected_dockerfile.buildspec
@@ -3,7 +3,7 @@
 FROM oraclelinux:9
 
 # Install core tools
-RUN dnf -y install which wget tar git
+RUN dnf -y install which wget tar unzip git
 
 # Install compiler and make
 RUN dnf -y install gcc make
@@ -65,3 +65,27 @@ EOF
 
 # Run the build
 RUN source /deps/bin/activate &&  pip install flit && if test -f "flit.ini"; then python -m flit.tomlify; fi && flit build
+
+# Validate script
+RUN cat <<'EOF' >/validate
+    [ -n "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl" ] || { echo "No upstream artifact to validate against."; exit 1; }
+    # Capture artifacts generated
+    WHEELS=(/src/dist/*.whl)
+    # Ensure we only have one artifact
+    [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; }
+    # BUILT_WHEEL is the artifact we built
+    BUILT_WHEEL=${WHEELS[0]}
+    # Ensure the artifact produced is not the literal returned by the glob
+    [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; }
+    # Download the wheel
+    wget -q https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl
+    # Compare wheel names
+    [ $(basename $BUILT_WHEEL) == "markdown_it_py-4.0.0-py3-none-any.whl" ] || { echo "Wheel name does not match!"; exit 1; }
+    # Compare file tree
+    (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree
+    (unzip -Z1 "markdown_it_py-4.0.0-py3-none-any.whl" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree
+    diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; }
+    echo "Success!"
+EOF
+
+ENTRYPOINT ["/bin/bash","/validate"]
diff --git a/tests/integration/cases/pypi_toga/expected_default.buildspec b/tests/integration/cases/pypi_toga/expected_default.buildspec
index 076503858..ac873e87f 100644
--- a/tests/integration/cases/pypi_toga/expected_default.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_default.buildspec
@@ -33,5 +33,13 @@
     },
     "build_backends": [
         "setuptools.build_meta"
-    ]
+    ],
+    "upstream_artifacts": {
+        "wheels": [
+            "https://files.pythonhosted.org/packages/2b/1a/6a9c8230ad30e819f0965bbd596c736a03e16003d27b0363c632c84d4861/toga-0.5.1-py3-none-any.whl"
+        ],
+        "sdist": [
+            "https://files.pythonhosted.org/packages/17/e7/0924150329474d61e9f40f8bba1056d640cba22438e05355924019111b46/toga-0.5.1.tar.gz"
+        ]
+    }
 }
diff --git a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
index d50340d8b..a8918d0ce 100644
--- a/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
+++ b/tests/integration/cases/pypi_toga/expected_dockerfile.buildspec
@@ -3,7 +3,7 @@
 FROM oraclelinux:9
 
 # Install core tools
-RUN dnf -y install which wget tar git
+RUN dnf -y install which wget tar unzip git
 
 # Install compiler and make
 RUN dnf -y install gcc make
@@ -65,3 +65,27 @@ EOF
 
 # Run the build
 RUN source /deps/bin/activate &&  python -m build --wheel -n
+
+# Validate script
+RUN cat <<'EOF' >/validate
+    [ -n "https://files.pythonhosted.org/packages/2b/1a/6a9c8230ad30e819f0965bbd596c736a03e16003d27b0363c632c84d4861/toga-0.5.1-py3-none-any.whl" ] || { echo "No upstream artifact to validate against."; exit 1; }
+    # Capture artifacts generated
+    WHEELS=(/src/dist/*.whl)
+    # Ensure we only have one artifact
+    [ ${#WHEELS[@]} -eq 1 ] || { echo "Unexpected artifacts produced!"; exit 1; }
+    # BUILT_WHEEL is the artifact we built
+    BUILT_WHEEL=${WHEELS[0]}
+    # Ensure the artifact produced is not the literal returned by the glob
+    [ -e $BUILT_WHEEL ] || { echo "No wheels found!"; exit 1; }
+    # Download the wheel
+    wget -q https://files.pythonhosted.org/packages/2b/1a/6a9c8230ad30e819f0965bbd596c736a03e16003d27b0363c632c84d4861/toga-0.5.1-py3-none-any.whl
+    # Compare wheel names
+    [ $(basename $BUILT_WHEEL) == "toga-0.5.1-py3-none-any.whl" ] || { echo "Wheel name does not match!"; exit 1; }
+    # Compare file tree
+    (unzip -Z1 $BUILT_WHEEL | grep -v '\.dist-info' | sort) > built.tree
+    (unzip -Z1 "toga-0.5.1-py3-none-any.whl" | grep -v '\.dist-info' | sort ) > pypi_artifact.tree
+    diff -u built.tree pypi_artifact.tree || { echo "File trees do not match!"; exit 1; }
+    echo "Success!"
+EOF
+
+ENTRYPOINT ["/bin/bash","/validate"]
diff --git a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
index c0612c42d..2173ac78b 100644
--- a/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
+++ b/tests/integration/cases/pypi_tree-sitter/expected_default.buildspec
@@ -22,5 +22,8 @@
     },
     "build_backends": [
         "setuptools.build_meta"
-    ]
+    ],
+    "upstream_artifacts": {
+        "sdist": ["https://files.pythonhosted.org/packages/66/7c/0350cfc47faadc0d3cf7d8237a4e34032b3014ddf4a12ded9933e1648b55/tree-sitter-0.25.2.tar.gz"]
+    }
 }
diff --git a/tests/integration/cases/pypi_tree-sitter/test.yaml b/tests/integration/cases/pypi_tree-sitter/test.yaml
index 13cf9d7d7..0b15a8bce 100644
--- a/tests/integration/cases/pypi_tree-sitter/test.yaml
+++ b/tests/integration/cases/pypi_tree-sitter/test.yaml
@@ -33,6 +33,6 @@ steps:
   options:
     command_args:
     - -purl
-    - pkg:pypi/markdown-it-py@0.25.2
+    - pkg:pypi/tree-sitter@0.25.2
     - --output-format
     - dockerfile

From cbf03c7194c3a8b32e11753eb49e88184b1ff964 Mon Sep 17 00:00:00 2001
From: Abhinav Pradeep <abhinav.pradeep@oracle.com>
Date: Thu, 19 Feb 2026 14:15:46 +1000
Subject: [PATCH 19/20] chore: improve pure wheel check (#1309)

Signed-off-by: Abhinav Pradeep <abhinav.pradeep@oracle.com>
---
 src/macaron/slsa_analyzer/package_registry/pypi_registry.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
index 9f988ed80..bce197890 100644
--- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
+++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py
@@ -987,10 +987,7 @@ def has_pure_wheel(self) -> bool:
             try:
                 _, _, _, tags = parse_wheel_filename(file_name)
                 # Check if none and any are in the tags (i.e. the wheel is pure)
-                # Technically a wheel can have multiple tag sets. Our condition for
-                # a pure wheel is that it has only one tag set with abi "none" and
-                # platform "any"
-                if len(tags) == 1 and all(tag.abi == "none" and tag.platform == "any" for tag in tags):
+                if all(tag.abi == "none" and tag.platform == "any" for tag in tags):
                     return True
             except InvalidWheelFilename:
                 logger.debug("Could not parse wheel name.")

From 41162501345aa2dab3c0e84bd4eb55f47b536391 Mon Sep 17 00:00:00 2001
From: Behnaz Hassanshahi <behnaz.hassanshahi@oracle.com>
Date: Thu, 19 Feb 2026 15:43:45 +1000
Subject: [PATCH 20/20] fix(gen-build-spec): remove the default
 -Dmaven.test.skip=true mvn option from the default spec (#1301)

This PR removes the default -Dmaven.test.skip=true option from the default buildspec for Maven artifacts.

Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
---
 .../tutorials/rebuild_third_party_artifacts.rst      |  3 +--
 .../build_spec_generator/build_command_patcher.py    |  1 -
 .../build_spec_generator/common_spec/jdk_finder.py   |  6 +++---
 .../reproducible_central/reproducible_central.py     | 12 ++++++++++--
 .../test_reproducible_central.py                     |  8 +++++---
 .../expected_macaron.buildspec                       |  2 +-
 .../expected_reproducible_central.buildspec          |  2 +-
 .../computer-k8s/expected_default.buildspec          |  1 -
 8 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/docs/source/pages/tutorials/rebuild_third_party_artifacts.rst b/docs/source/pages/tutorials/rebuild_third_party_artifacts.rst
index aaef86b8e..24bb9fd22 100644
--- a/docs/source/pages/tutorials/rebuild_third_party_artifacts.rst
+++ b/docs/source/pages/tutorials/rebuild_third_party_artifacts.rst
@@ -131,7 +131,6 @@ By default we generate the buildspec in JSON format as follows:
             [
             "mvn",
             "-DskipTests=true",
-            "-Dmaven.test.skip=true",
             "-Dmaven.site.skip=true",
             "-Drat.skip=true",
             "-Dmaven.javadoc.skip=true",
@@ -161,7 +160,7 @@ The resulting file will be saved as ``output/buildspec/maven/org_apache_hugegrap
     tool=mvn
     jdk=8
     newline=lf
-    command="mvn -DskipTests=true -Dmaven.test.skip=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package"
+    command="mvn -Dmaven.test.skip=true -DskipTests=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package"
     buildinfo=target/computer-k8s-1.0.0.buildinfo
 
 You can now use this file to automate rebuilding artifacts, for example as part of the Reproducible Central infrastructure.
diff --git a/src/macaron/build_spec_generator/build_command_patcher.py b/src/macaron/build_spec_generator/build_command_patcher.py
index 224ec5715..cbfd32722 100644
--- a/src/macaron/build_spec_generator/build_command_patcher.py
+++ b/src/macaron/build_spec_generator/build_command_patcher.py
@@ -47,7 +47,6 @@
             # To remove "-Dgpg.passphrase=$MACARON_UNKNOWN"
             "gpg.passphrase": None,
             "skipTests": "true",
-            "maven.test.skip": "true",
             "maven.site.skip": "true",
             "rat.skip": "true",
             "maven.javadoc.skip": "true",
diff --git a/src/macaron/build_spec_generator/common_spec/jdk_finder.py b/src/macaron/build_spec_generator/common_spec/jdk_finder.py
index 538a57b28..45d5b71dd 100644
--- a/src/macaron/build_spec_generator/common_spec/jdk_finder.py
+++ b/src/macaron/build_spec_generator/common_spec/jdk_finder.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module includes the functions for obtaining the JDK version from a Java artifact."""
@@ -198,7 +198,7 @@ def find_jdk_version_from_remote_maven_repo_standalone(
                 local_artifact_path,
             )
         except InvalidHTTPResponseError as error:
-            logger.error("Failed why trying to download jar file. Error: %s", error)
+            logger.debug("Failed while trying to download jar file. Error: %s", error)
             return None
         except OSError as os_error:
             logger.critical("Critical %s", os_error)
@@ -278,7 +278,7 @@ def find_jdk_version_from_remote_maven_repo_cache(
             local_artifact_path,
         )
     except InvalidHTTPResponseError as error:
-        logger.error("Failed why trying to download jar file. Error: %s", error)
+        logger.debug("Failed while trying to download jar file. Error: %s", error)
         return None
     except OSError as os_error:
         logger.critical("Critical %s", os_error)
diff --git a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py
index 5a6ec8389..c5f861c90 100644
--- a/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py
+++ b/src/macaron/build_spec_generator/reproducible_central/reproducible_central.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the logic to generate a build spec in the Reproducible Central format."""
@@ -85,6 +85,14 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str |
     if build_spec["group_id"] is None:
         raise GenerateBuildSpecError(f"Version is missing in PURL {build_spec['purl']}")
 
+    # Add -Dmaven.test.skip for Maven builds.
+    # TODO: Use the build tool associated with the build command once
+    # https://github.com/oracle/macaron/issues/1300 is closed.
+    adapted_build_commands = [
+        cmd[:1] + ["-Dmaven.test.skip=true"] + cmd[1:] if ReproducibleCentralBuildTool.MAVEN in cmd[0] else cmd
+        for cmd in build_spec["build_commands"]
+    ]
+
     template_format_values: dict[str, str] = {
         "macaron_version": importlib_metadata.version("macaron"),
         "group_id": build_spec["group_id"],
@@ -96,7 +104,7 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str |
         "newline": build_spec["newline"],
         "buildinfo": f"target/{build_spec['artifact_id']}-{build_spec['version']}.buildinfo",
         "jdk": build_spec["language_version"][0],
-        "command": compose_shell_commands(build_spec["build_commands"]),
+        "command": compose_shell_commands(adapted_build_commands),
     }
 
     return STRING_TEMPLATE.format_map(template_format_values)
diff --git a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py
index f28b93f66..f95fefeb7 100644
--- a/tests/build_spec_generator/reproducible_central/test_reproducible_central.py
+++ b/tests/build_spec_generator/reproducible_central/test_reproducible_central.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains tests for Reproducible Central build spec generation."""
@@ -40,7 +40,7 @@ def test_successful_build_spec(base_build_spec: BaseBuildSpecDict) -> None:
     assert "groupId=com.oracle" in content
     assert "artifactId=example-artifact" in content
     assert "tool=mvn" in content
-    assert 'command="mvn package"' in content
+    assert 'command="mvn -Dmaven.test.skip=true package"' in content
 
 
 def test_unsupported_build_tool(base_build_spec: BaseBuildSpecDict) -> None:
@@ -80,6 +80,8 @@ def test_compose_shell_commands_integration(base_build_spec: BaseBuildSpecDict)
     """Test that the correct compose_shell_commands function is used."""
     base_build_spec["build_commands"] = [["mvn", "clean", "package"], ["echo", "done"]]
     content = gen_reproducible_central_build_spec(base_build_spec)
-    expected_commands = compose_shell_commands([["mvn", "clean", "package"], ["echo", "done"]])
+    expected_commands = compose_shell_commands(
+        [["mvn", "-Dmaven.test.skip=true", "clean", "package"], ["echo", "done"]]
+    )
     assert content
     assert f'command="{expected_commands}"' in content
diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec
index 2fe2e28c8..d5f9ec1ae 100644
--- a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec
+++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_macaron.buildspec
@@ -1 +1 @@
-{"macaron_version": "0.17.0", "group_id": "io.github.behnazh-w.demo", "artifact_id": "core", "version": "2.0.3", "git_repo": "https://github.com/behnazh-w/example-maven-provenance", "git_tag": "597be192fb50f03b86c34f1bfc494fea1eab264f", "newline": "lf", "language_version": "17", "ecosystem": "maven", "purl": "pkg:maven/io.github.behnazh-w.demo/core@2.0.3", "language": "java", "build_tool": "maven", "build_commands": [["./mvnw", "-DskipTests=true", "-Dmaven.test.skip=true", "-Dmaven.site.skip=true", "-Drat.skip=true", "-Dmaven.javadoc.skip=true", "clean", "package"]]}
+{"macaron_version": "0.17.0", "group_id": "io.github.behnazh-w.demo", "artifact_id": "core", "version": "2.0.3", "git_repo": "https://github.com/behnazh-w/example-maven-provenance", "git_tag": "597be192fb50f03b86c34f1bfc494fea1eab264f", "newline": "lf", "language_version": "17", "ecosystem": "maven", "purl": "pkg:maven/io.github.behnazh-w.demo/core@2.0.3", "language": "java", "build_tool": "maven", "build_commands": [["./mvnw", "-DskipTests=true", "-Dmaven.site.skip=true", "-Drat.skip=true", "-Dmaven.javadoc.skip=true", "clean", "package"]]}
diff --git a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_reproducible_central.buildspec b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_reproducible_central.buildspec
index f1622f4e7..1586f4512 100644
--- a/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_reproducible_central.buildspec
+++ b/tests/integration/cases/behnazh-w_example-maven-app_gen_rc_build_spec/expected_reproducible_central.buildspec
@@ -14,6 +14,6 @@ jdk=17
 
 newline=lf
 
-command="./mvnw -DskipTests=true -Dmaven.test.skip=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package"
+command="./mvnw -Dmaven.test.skip=true -DskipTests=true -Dmaven.site.skip=true -Drat.skip=true -Dmaven.javadoc.skip=true clean package"
 
 buildinfo=target/core-2.0.3.buildinfo
diff --git a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec
index 86325ad7f..05dbdb6f2 100644
--- a/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec
+++ b/tests/integration/cases/org_apache_hugegraph/computer-k8s/expected_default.buildspec
@@ -19,7 +19,6 @@
         [
             "mvn",
             "-DskipTests=true",
-            "-Dmaven.test.skip=true",
             "-Dmaven.site.skip=true",
             "-Drat.skip=true",
             "-Dmaven.javadoc.skip=true",