diff --git a/.github/workflows/run-tests-all-dbs.yml b/.github/workflows/run-tests-all-dbs.yml
index ac2efbe4..53c30de9 100644
--- a/.github/workflows/run-tests-all-dbs.yml
+++ b/.github/workflows/run-tests-all-dbs.yml
@@ -57,7 +57,7 @@ jobs:
python -c "from flowcept.configs import MONGO_ENABLED, LMDB_ENABLED; print('MONGO?', MONGO_ENABLED); print('LMDB?', LMDB_ENABLED)"
flowcept --init-settings --full -y
flowcept --config-profile full-online -y
- pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
- name: Test notebooks with pytest and redis
run: |
diff --git a/.github/workflows/run-tests-kafka-and-rabbit-mq.yml b/.github/workflows/run-tests-kafka-and-rabbit-mq.yml
new file mode 100644
index 00000000..11210c09
--- /dev/null
+++ b/.github/workflows/run-tests-kafka-and-rabbit-mq.yml
@@ -0,0 +1,166 @@
+name: (With Mongo) Tests on Kafka and RabbitMQ MQ
+on:
+ pull_request:
+ branches: [ "dev", "main" ]
+ types: [opened, synchronize, reopened]
+
+jobs:
+
+ build:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: [ "3.11", "3.12" ]
+ env:
+ MONGO_ENABLED: true
+ LMDB_ENABLED: false
+ timeout-minutes: 50
+ if: "!contains(github.event.head_commit.message, 'CI Bot')"
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ # cache: "pip"
+
+ - name: Run docker compose
+ run: docker compose -f deployment/compose-kafka.yml up -d
+
+ - name: Upgrade pip
+ run: python -m pip install --upgrade pip
+
+ - name: Show Python version
+ run: python --version && pip --version
+
+ - name: Test examples
+ run: bash .github/workflows/run_examples.sh examples true # with mongo
+
+ - name: Install all dependencies
+ run: |
+ python -m pip install .[all]
+ python -m pip install .[ml_dev]
+
+ - name: Check liveness
+ run: |
+ export MQ_TYPE=kafka
+ export MQ_PORT=9092
+ python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")'
+ python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()'
+
+ - name: Run tests with kafka
+ run: |
+ export MQ_TYPE=kafka
+ export MQ_PORT=9092
+ flowcept --init-settings --full -y
+ flowcept --config-profile full-online -y
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
+
+ - name: Test telemetry with kafka
+ run: |
+ export MQ_TYPE=kafka
+ export MQ_PORT=9092
+ flowcept --init-settings --full -y
+ flowcept --config-profile full-telemetry -y
+ pytest tests/misc_tests/telemetry_test.py -q
+
+ - name: Test notebooks
+ run: |
+ export MQ_TYPE=kafka
+ export MQ_PORT=9092
+ rm -f "${FLOWCEPT_SETTINGS_PATH:-$HOME/.flowcept/settings.yaml}"
+ flowcept --init-settings --full --dask --mlflow -y
+ flowcept --config-profile full-online -y
+ pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb --ignore=notebooks/tensorboard.ipynb
+
+ - name: Stop services
+ run: docker compose -f deployment/compose-kafka.yml down
+
+ - name: Clean up
+ run: |
+ make clean
+ test -d /home/runner/runners/ && find /home/runner/runners/ -type f -name "*.log" -exec sh -c 'echo {}; >"{}"' \; || true
+ docker image prune -a -f
+
+ build-rabbitmq:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: [ "3.11", "3.12" ]
+ env:
+ MONGO_ENABLED: true
+ LMDB_ENABLED: false
+ timeout-minutes: 50
+ if: "!contains(github.event.head_commit.message, 'CI Bot')"
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Run docker compose
+ run: docker compose -f deployment/compose-rabbitmq.yml up -d
+
+ - name: Upgrade pip
+ run: python -m pip install --upgrade pip
+
+ - name: Show Python version
+ run: python --version && pip --version
+
+ - name: Test examples
+ run: bash .github/workflows/run_examples.sh examples true # with mongo
+
+ - name: Install all dependencies
+ run: |
+ python -m pip install .[all]
+ python -m pip install .[ml_dev]
+
+ - name: Check liveness
+ run: |
+ export MQ_TYPE=rabbitmq
+ export MQ_PORT=5672
+ python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")'
+ python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()'
+
+ - name: Run tests with RabbitMQ
+ run: |
+ export MQ_TYPE=rabbitmq
+ export MQ_PORT=5672
+ flowcept --init-settings --full -y
+ flowcept --config-profile full-online -y
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
+
+ - name: Test telemetry with RabbitMQ
+ run: |
+ export MQ_TYPE=rabbitmq
+ export MQ_PORT=5672
+ flowcept --init-settings --full -y
+ flowcept --config-profile full-telemetry -y
+ pytest tests/misc_tests/telemetry_test.py -q
+
+ - name: Test notebooks
+ run: |
+ export MQ_TYPE=rabbitmq
+ export MQ_PORT=5672
+ rm -f "${FLOWCEPT_SETTINGS_PATH:-$HOME/.flowcept/settings.yaml}"
+ flowcept --init-settings --full --dask --mlflow -y
+ flowcept --config-profile full-online -y
+ pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb --ignore=notebooks/tensorboard.ipynb
+
+ - name: Stop services
+ run: docker compose -f deployment/compose-rabbitmq.yml down
+
+ - name: Clean up
+ run: |
+ make clean
+ test -d /home/runner/runners/ && find /home/runner/runners/ -type f -name "*.log" -exec sh -c 'echo {}; >"{}"' \; || true
+ docker image prune -a -f
diff --git a/.github/workflows/run-tests-kafka.yml b/.github/workflows/run-tests-kafka.yml
deleted file mode 100644
index 2d1a9add..00000000
--- a/.github/workflows/run-tests-kafka.yml
+++ /dev/null
@@ -1,87 +0,0 @@
-name: (With Mongo) Tests on Kafka MQ
-on:
- pull_request:
- branches: [ "dev", "main" ]
- types: [opened, synchronize, reopened]
-
-jobs:
-
- build:
- runs-on: ubuntu-latest
- strategy:
- matrix:
- python-version: [ "3.11", "3.12" ]
- env:
- MONGO_ENABLED: true
- LMDB_ENABLED: false
- timeout-minutes: 50
- if: "!contains(github.event.head_commit.message, 'CI Bot')"
-
- steps:
- - uses: actions/checkout@v4
- with:
- fetch-depth: 1
-
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
- # cache: "pip"
-
- - name: Run docker compose
- run: docker compose -f deployment/compose-kafka.yml up -d
-
- - name: Upgrade pip
- run: python -m pip install --upgrade pip
-
- - name: Show Python version
- run: python --version && pip --version
-
- - name: Test examples
- run: bash .github/workflows/run_examples.sh examples true # with mongo
-
- - name: Install all dependencies
- run: |
- python -m pip install .[all]
- python -m pip install .[ml_dev]
-
- - name: Check liveness
- run: |
- export MQ_TYPE=kafka
- export MQ_PORT=9092
- python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")'
- python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()'
-
- - name: Run tests with kafka
- run: |
- export MQ_TYPE=kafka
- export MQ_PORT=9092
- flowcept --init-settings --full -y
- flowcept --config-profile full-online -y
- pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
-
- - name: Test telemetry with kafka
- run: |
- export MQ_TYPE=kafka
- export MQ_PORT=9092
- flowcept --init-settings --full -y
- flowcept --config-profile full-telemetry -y
- pytest tests/misc_tests/telemetry_test.py -q
-
- - name: Test notebooks
- run: |
- export MQ_TYPE=kafka
- export MQ_PORT=9092
- rm -f "${FLOWCEPT_SETTINGS_PATH:-$HOME/.flowcept/settings.yaml}"
- flowcept --init-settings --full --dask --mlflow -y
- flowcept --config-profile full-online -y
- pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb --ignore=notebooks/tensorboard.ipynb
-
- - name: Stop services
- run: docker compose -f deployment/compose-kafka.yml down
-
- - name: Clean up
- run: |
- make clean
- test -d /home/runner/runners/ && find /home/runner/runners/ -type f -name "*.log" -exec sh -c 'echo {}; >"{}"' \; || true
- docker image prune -a -f
diff --git a/.github/workflows/run-tests-py313.yml b/.github/workflows/run-tests-py313.yml
index 5f0373a4..54a98ac4 100644
--- a/.github/workflows/run-tests-py313.yml
+++ b/.github/workflows/run-tests-py313.yml
@@ -47,7 +47,7 @@ jobs:
run: |
flowcept --init-settings --full -y
flowcept --config-profile full-online -y
- pytest --ignore=tests/adapters/test_tensorboard.py --ignore=tests/adapters/test_broker.py --ignore=tests/instrumentation_tests/ml_tests/ --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
+ pytest --ignore=tests/adapters/test_tensorboard.py --ignore=tests/adapters/test_broker.py --ignore=tests/instrumentation_tests/ml_tests/ --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
- name: Shut down docker compose
run: make services-stop-mongo
diff --git a/.github/workflows/run-tests-simple.yml b/.github/workflows/run-tests-simple.yml
index b27ed4bd..349e16a8 100644
--- a/.github/workflows/run-tests-simple.yml
+++ b/.github/workflows/run-tests-simple.yml
@@ -52,7 +52,7 @@ jobs:
run: |
flowcept --init-settings --full -y
flowcept --config-profile full-online -y
- pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
- name: Test notebooks with pytest and redis
run: |
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 1fa2e78d..caef619e 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -51,10 +51,10 @@ jobs:
run: |
pip install .
pip list
- flowcept --init-settings -y
+ rm -f ~/.flowcept/settings.yaml flowcept_buffer.jsonl WORKFLOW_CARD.md
python examples/start_here.py
pip uninstall flowcept -y
- rm ~/.flowcept/settings.yaml
+ rm -f ~/.flowcept/settings.yaml flowcept_buffer.jsonl WORKFLOW_CARD.md
- name: Start docker compose with redis
run: make services-mongo
@@ -110,7 +110,8 @@ jobs:
echo "=== inotify limits ==="
cat /proc/sys/fs/inotify/max_user_watches || true
cat /proc/sys/fs/inotify/max_user_instances || true
- pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
+ # Below, ignoring tests that require an active LLM service provider:
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
- name: Test decorator timing in isolated offline mode
run: |
@@ -182,7 +183,7 @@ jobs:
echo "=== inotify limits ==="
cat /proc/sys/fs/inotify/max_user_watches || true
cat /proc/sys/fs/inotify/max_user_instances || true
- pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"
+ pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/misc_tests/telemetry_test.py -m "not llm" -k "not test_decorated_function_timed"
- name: Test telemetry in isolated telemetry mode with kafka
run: |
diff --git a/.github/workflows/ui-checks.yml b/.github/workflows/run-ui-checks.yml
similarity index 92%
rename from .github/workflows/ui-checks.yml
rename to .github/workflows/run-ui-checks.yml
index fb15e517..995c195f 100644
--- a/.github/workflows/ui-checks.yml
+++ b/.github/workflows/run-ui-checks.yml
@@ -20,6 +20,9 @@ jobs:
- name: Install UI dependencies
run: make ui-install
+ - name: Run UI checks (lint)
+ run: make ui-checks
+
- name: Run UI unit tests
run: make ui-test
diff --git a/AGENTS.md b/AGENTS.md
index b96eb9c5..e5ec555c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -3,15 +3,30 @@
This file is the single source of truth for code-assistant behavior in this repository.
Each major module and the UI also has its own `README.md` (under `src/flowcept/*/`, `ui/`, `tests/`, `deployment/`, `examples/`) with deeper subsystem context; read the relevant one before working in that area.
+## Agent Prompt Design Rule
+
+Prompts in `src/flowcept/agents/prompts/` must remain domain- and application-agnostic. Adding app-specific fields or activity names to fix one test is a design failure.
+
+Do not add few-shots to fix specific queries; revisit the prompting strategy instead.
+
+## Test Failure Rule
+
+Fix the system or the expectation — never the test data.
+
Do not duplicate these rules in `CLAUDE.md`, `.cursor/rules`, `GEMINI.md`, `SKILL.md`, or other agent files.
If a tool requires its own file, make that file (which should immediately go to .gitignore) a thin pointer to this one.
## 1. First Principles
- Be surgical. Prefer small, reviewable changes.
-- Reuse above all. Avoid duplication and one-off fixes.
+- Before proposing any implementation or design strategy, find how the codebase already solves the same concern — same class type, same data flow, same operation. Replicate that solution exactly. If no existing pattern exists, flag it in the response before implementing.
+- Flowcept is extremely performance-sensitive, especially in the data producer path. Even small ifs, loops, or function calls in hot paths must be avoided at all costs.
+- Reuse above all. Avoid duplication and one-off fixes. Duplicating code or logic is a MAJOR problem. Avoid it at all costs.
+- You often solve the main problem being addressed by silently injecting several other problems. Before editing a file, you need to assess the impact it will have in other parts of the code. One-off solutions bring more problems than resolve anything.
+- Separation of concerns is extremely important in this project. Mixing concerns is not acceptable. Each module in the project has a clear and separate concern. Report if you find violations.
- Do not overengineer.
- Prefer visible failures over fallback code that hides contract mismatches.
+- Never add defensive type checks, isinstance guards, or fallback values unless explicitly asked. Let the code break loudly.
- Prefer `settings.yaml` over hardcoded behavior.
- Avoid dependency pins unless there is a proven direct reason and no better practical fix.
- Do not commit personal absolute paths.
@@ -21,11 +36,13 @@ If a tool requires its own file, make that file (which should immediately go to
## 2. Interaction Rules
+- Answer questions in text only. Questions ("why", "how", "is X", "should we", "assess") are never commands to write code.
+- Never edit more than what was explicitly named in the request.
+- If fixing X reveals problem Y, report Y — do not fix it.
- Keep responses under 50 words unless the user asks for detail.
-- Do not dump large code or long explanations unless explicitly asked.
- Before long-running operations, warn the user and ask permission.
-- During approved long operations, provide brief status updates about every minute.
-- The human user is the owner and responsible for all actions in this code. Explain tradeoffs clearly, then follow decisions.
+- The human user is the owner. Explain tradeoffs clearly, then follow decisions.
+
## 3. Editing Rules
@@ -153,7 +170,8 @@ flowcept --init-settings --full --dask --mlflow -y
**TDD is mandatory for both Python and UI/frontend.** Write the test first, watch it fail, then implement until it passes.
-- **Python**: write a real integration test in `tests/` before the implementation. Guard service-dependent tests with `Flowcept.services_alive()` / `MONGO_ENABLED` skips. No mocks.
+- **Python**: real integration tests are in `tests/`. Before the implementation, check if what you are about to implement is already covered in both unit and the integration tests. If not, they must be. Guard service-dependent tests with skips that use `Flowcept.services_alive()` / *_ENABLED flags available in configs.py.
+- Test the real thing! No mocks. No fakes. Prefer generating new data than relying on synthetic/
- **UI/Frontend**: write a vitest test in `ui/tests/` before adding new pure logic (store mutations, utility functions, graph algorithms). Use real data fixtures — no mocks, no DOM for pure-function and store tests. Component render tests are discouraged (fragile, high mock cost); test logic at the function/store level instead. Run with `make ui-test`.
Use the `flowcept` conda environment.
@@ -201,7 +219,9 @@ Do not run tests from scratch/sandbox directories. Target `tests/` explicitly.
- Prefer real tests over mocks. Use real services, real data, and real LLMs when feasible.
- Avoid mock-heavy tests unless there is no practical alternative.
- When a test fails, the correct fix is almost always to fix the implementation code, not the test; the test itself is very rarely the culprit. Always resolve warnings at their source rather than silencing them.
+- **NEVER lower test thresholds or broaden expected responses just to make a failing test pass.** Doing so hides real bugs and degrades the test suite over time. If a test fails, fix the underlying behavior.
- **Periodically recommend running the full integration test suites** (`make tests` and `E2E_LIVE=1 make ui-e2e`) — especially after merges, significant backend or UI changes, or when the user has been iterating quickly on a feature. Mocked tests alone are not sufficient to catch regressions against real services.
+- **Tests must verify meaningful system behavior**, not code structure (file paths, imports, `hasattr` checks).
## 11. CI And Dependency Drift
@@ -214,7 +234,7 @@ Important CI surfaces:
- `run-tests.yml`: broad Redis and Kafka path on push/schedule.
- `run-tests-simple.yml`: Redis without Mongo.
- `run-tests-offline.yml`: full offline profile.
-- `run-tests-kafka.yml`: Kafka + Mongo.
+- `run-tests-kafka-and-rabbit-mq.yml`: Kafka + RabbitMQ + Mongo.
- `run-tests-all-dbs.yml`: Mongo and LMDB coverage.
- `run-tests-in-container.yml`: Docker image tests.
- `run-tests-py313.yml`: Python 3.13 subset.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e70ad1ca..dfe7c789 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -50,7 +50,7 @@ Several GitHub Actions cover different runtime environments:
* [run-tests.yml](.github/workflows/run-tests.yml) runs the main test matrix, including Redis and Kafka paths.
* [run-tests-simple.yml](.github/workflows/run-tests-simple.yml) runs tests without MongoDB.
* [run-tests-offline.yml](.github/workflows/run-tests-offline.yml) runs the full-offline profile.
-* [run-tests-kafka.yml](.github/workflows/run-tests-kafka.yml) runs Mongo-backed tests with Kafka MQ.
+* [run-tests-kafka-and-rabbit-mq.yml](.github/workflows/run-tests-kafka-and-rabbit-mq.yml) runs Mongo-backed tests with Kafka and RabbitMQ.
* [run-tests-all-dbs.yml](.github/workflows/run-tests-all-dbs.yml) runs Mongo and non-Mongo database paths.
* [run-tests-in-container.yml](.github/workflows/run-tests-in-container.yml) runs tests inside the Flowcept container.
* [run-tests-py313.yml](.github/workflows/run-tests-py313.yml) runs the Python 3.13-compatible subset.
diff --git a/Makefile b/Makefile
index 211f1cf9..4e921ab6 100644
--- a/Makefile
+++ b/Makefile
@@ -12,11 +12,14 @@ help:
@printf "\033[32mservices-stop-kafka\033[0m stop Kafka services and remove attached volumes\n"
@printf "\033[32mservices-mofka\033[0m run services with Mofka using Docker\n"
@printf "\033[32mservices-stop-mofka\033[0m stop Mofka services and remove attached volumes\n"
+ @printf "\033[32mservices-rabbitmq\033[0m run services with RabbitMQ using Docker\n"
+ @printf "\033[32mservices-stop-rabbitmq\033[0m stop RabbitMQ services and remove attached volumes\n"
@printf "\033[32mtests\033[0m run unit tests with pytest\n"
@printf "\033[32mtests-offline\033[0m run offline-safe tests with pytest\n"
@printf "\033[32mtests-in-container\033[0m run unit tests with pytest inside Flowcept's container\n"
@printf "\033[32mtests-in-container-mongo\033[0m run unit tests inside container with MongoDB\n"
- @printf "\033[32mtests-in-container-kafka\033[0m run unit tests inside container with Kafka and MongoDB\n"
+ @printf "\033[32mtests-in-container-kafka\033[0m run unit tests inside container with Kafka and MongoDB\n"
+ @printf "\033[32mtests-in-container-rabbitmq\033[0m run unit tests inside container with RabbitMQ and MongoDB\n"
@printf "\033[32mtests-notebooks\033[0m test the notebooks using pytest\n"
@printf "\033[32mclean\033[0m remove cache directories and Sphinx build output\n"
@printf "\033[32mdocs\033[0m build HTML documentation using Sphinx\n"
@@ -154,6 +157,17 @@ services-mofka:
services-stop-mofka:
docker compose --file deployment/compose-mofka.yml down --volumes
+# Run services with RabbitMQ using Docker
+services-rabbitmq:
+ docker compose --file deployment/compose-rabbitmq.yml up --detach
+
+# Stop RabbitMQ services and remove attached volumes
+services-stop-rabbitmq:
+ docker compose --file deployment/compose-rabbitmq.yml down --volumes
+
+tests-in-container-rabbitmq:
+ docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_rabbitmq -e MQ_PORT=5672 -e MQ_TYPE=rabbitmq -e MONGO_HOST=flowcept_mongo -e MONGO_ENABLED=true -e LMDB_ENABLED=false --network flowcept_default flowcept /bin/bash -lc '/opt/conda/envs/flowcept/bin/flowcept --init-settings --full -y && /opt/conda/envs/flowcept/bin/flowcept --config-profile full-online -y && /opt/conda/envs/flowcept/bin/pytest tests --timeout=600 --ignore=tests/adapters/test_tensorboard.py --ignore=tests/instrumentation_tests/ml_tests --ignore=tests/misc_tests/telemetry_test.py -k "not test_decorated_function_timed"'
+
# Run unit tests using pytest
.PHONY: tests
tests:
diff --git a/README.md b/README.md
index 47e32683..6b5e79e3 100644
--- a/README.md
+++ b/README.md
@@ -1,570 +1,291 @@
-
-
-
-
Lightweight Distributed Workflow Provenance
+
Runtime provenance for distributed scientific and AI workflows
+
+Capture, stream, query, visualize, and reason over workflow lineage across ML, agentic, edge, cloud, and HPC systems.
+
----
-
-Flowcept captures and queries workflow provenance at runtime with minimal code changes and low overhead. It unifies data from diverse tools and workflows across the Edge–Cloud–HPC continuum and provides ML-aware capture, MCP agents provenance, telemetry, extensible adapters, and flexible storage.
-
----
-
+
[](https://flowcept.readthedocs.io/)
-[](https://workflowscommunity.slack.com/archives/C06L5GYJKQS)
-[](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml)
[](https://pypi.org/project/flowcept)
[](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml)
-[](https://github.com/ORNL/flowcept/actions/workflows/checks.yml)
+[](https://github.com/ORNL/flowcept/actions/workflows/checks.yml)
[](LICENSE)
+---
+## What Is Flowcept?
+Flowcept is a lightweight runtime provenance data management and data observability system for AI, scientific, and agentic workflows.
+It records what ran, what data were used and generated, where it ran, how long it took,
+which resources it consumed, and how workflow artifacts relate to each other, with rich context and detailed metadata.
-
+It is designed for workflows that are distributed, heterogeneous, and hard to inspect after the fact:
+ML training, agentic workflows, HPC jobs, edge-to-cloud-to-HPC pipelines, Workflow Management System tasks, and multi-workflow campaigns.
+Flowcept records can start as simple JSONL files for small local demos, move to LMDB for
+pure file-based runs, and scale to MongoDB for cloud deployments, production-style querying, dashboards, and UI use.
+For distributed runs, provenance records can also stream through a message queue while the
+workflow is still executing. The Flowcept Agent adds LLM-based interaction on top of captured
+provenance, so users can ask questions, inspect lineage, and drive visual exploration naturally, both to the in-motion data in the MQ systems or the data persisted in the database.
----
+## See It In Action
+
+The Flowcept UI turns captured provenance into browsable workflow structure, data lineage,
+dashboards, workflow cards, and natural-language provenance exploration.
+
+> Place Gif here
-# Quickstart
+
-The easiest way to capture provenance from plain Python functions, with no external services needed:
+## Why Flowcept?
-1) Install and initialize settings
+- **Distributed by design**: MQ-based provenance streaming with [Redis](https://redis.io) (default), [RabbitMQ](https://www.rabbitmq.com), [Kafka](https://kafka.apache.org), and [Mofka](https://mofka.readthedocs.io), plus database-backed storage for online querying.
+- **Low-overhead HPC capture**: buffer and stream provenance with low interference in large-scale jobs.
+- **Plugin-friendly capture**: instrument native code or use adapters, including PyTorch, Dask, MLflow, TensorBoard, and more.
+- **AI/ML-ready semantics**: preserve workflow, task, parameter, metric, model, tensor, artifact, telemetry, and resource-usage context.
+- **Agentic workflow lineage**: capture agent/tool execution, LLM usage, prompts, responses, and runtime provenance.
+- **Flowcept Agent**: chat naturally with captured provenance, inspect workflow data, generate workflow cards, and drive interactive visualizations.
+- **Standards-aware provenance**: Flowcept follows the [W3C PROV](https://www.w3.org/TR/prov-overview/) model and extends it for workflow, ML, and agentic execution contexts.
-```shell
-# Make sure you activate your Python environment (e.g., conda, venv) first
+## Quickstart (get it up in 1 minute): Capture Provenance Offline
+
+This minimal example needs no database, broker, or external service.
+
+```bash
pip install flowcept
-flowcept --init-settings
```
-This generates a minimal settings file in `~/.flowcept/settings.yaml`.
-
-2) Run the minimal example
-Save the following script as `quickstart.py` and run `python quickstart.py.`
+Create `quickstart.py`:
```python
-"""
-A minimal example of Flowcept's instrumentation using @decorators.
-This example needs no DB, broker, or external service.
-"""
from flowcept import Flowcept, flowcept_task
from flowcept.instrumentation.flowcept_decorator import flowcept
-@flowcept_task(output_names="o1")
-def sum_one(i1):
- return i1 + 1
+@flowcept_task(output_names="y")
+def add_one(x):
+ return x + 1
-@flowcept_task(output_names="o2")
-def mult_two(o1):
- return o1 * 2
+@flowcept_task(output_names="z")
+def double(y):
+ return y * 2
@flowcept
def main():
- n = 3
- o1 = sum_one(n)
- o2 = mult_two(o1)
- print("Final output", o2)
+ y = add_one(3)
+ z = double(y)
+ print("result:", z)
if __name__ == "__main__":
main()
-
- prov_messages = Flowcept.read_buffer_file()
- assert len(prov_messages) == 2
- print(f"Raw provenance captured: {len(prov_messages)} records in flowcept_messages.jsonl")
- Flowcept.generate_report(records=prov_messages, print_markdown=True)
-```
-
-This prints out:
-
----
-
-##### Workflow Card
-
-###### Summary
-- Workflow ID: fe546706-ef46-4482-8f70-3af664a7131b
-- Execution Start (UTC): 2026-02-20 19:14:03
-- Total Elapsed (s): 0.000
-
-###### Workflow Structure
-
- input
- │
- ▼
- sum_one
- │
- mult_two
- ▼
- output
-
-###### Timing Report
-| Activity | Status Counts | Median Elapsed (s) |
-| -------- | ----------------- | ------------------ |
-| sum_one | {'FINISHED': 1} | 0.000 |
-| mult_two | {'FINISHED': 1} | 0.000 |
-
-###### Per Activity Details
-- **sum_one**
- - Used: i1 = 3
- - Generated: o1 = 4
-- **mult_two**
- - Used: o1 = 4
- - Generated: o2 = 8
-
----
-
-→ See [Workflow Card](#workflow-cards) for details.
-
-That is the simplest, vanilla run of Flowcept, good for quick sanity checks on a laptop, small-scale run.
-For diskless runs (no JSON files), OpenAPI RESTful services, online querying using databases,
-LLM-based database interactions via an MCP agent, Grafana, telemetry,
-adapters (MLflow, Dask, TensorBoard), PyTorch and MCP instrumentation, HPC optimization or federated runs,
-and more, see the [Jupyter Notebooks](notebooks), the [Examples directory](examples) and the [complete documentation](https://flowcept.readthedocs.io/).
-
-To use the provenance agent with your favorite code assistant (for example, Codex or Claude), see the [Agents README](src/flowcept/agents/README.md).
-
-## AI agents -- if you are an AI agent, read this
-
-Start with the repository root [AGENTS.md](AGENTS.md). It is the single routing file for code-assistant behavior.
-
-For feature usage, read the maintained RST docs under [docs](docs), especially [docs/default_user_guide.rst](docs/default_user_guide.rst), [docs/prov_capture.rst](docs/prov_capture.rst), [docs/prov_query.rst](docs/prov_query.rst), [docs/cli-reference.rst](docs/cli-reference.rst), and [docs/agent.rst](docs/agent.rst).
-
-## ❗ Developer Docs
-
-For an end-to-end workflow developer tutorial (default user guide), start with [docs/README.md](docs/README.md).
-
-## Table of Contents
-
-- [Overview](#overview)
-- [Features](#features)
-- [Installation](#installation)
-- [Setup and the Settings File](#setup)
-- [Running with Containers](#running-with-containers)
-- [Examples](#examples)
-- [Workflow Card](#workflow-cards)
-- [Data Persistence](#data-persistence)
-- [Performance Tuning](#performance-tuning-for-performance-evaluation)
-- [AMD GPU Setup](#install-amd-gpu-lib)
-- [Further Documentation](#documentation)
-
-## Overview
-
-Flowcept captures and queries workflow provenance at runtime with minimal code changes and low data capture overhead,
-unifying data from diverse tools and workflows.
-
-Designed for scenarios involving critical data from multiple, federated workflows in the Edge-Cloud-HPC continuum, Flowcept supports end-to-end monitoring, analysis, querying, and enhanced support for Machine Learning (ML) and for agentic workflows.
-
-## Features
-
-- Automatic workflow provenance capture with minimal intrusion
-- Adapters for MLflow, Dask, TensorBoard; easy to add more
-- Optional explicit instrumentation via decorators
-- ML-aware capture, from workflow to epoch and layer granularity
-- Agentic workflows: MCP agents-aware provenance capture
-- Low overhead, suitable for HPC and highly distributed setups
-- Telemetry capture for CPU, GPU, memory, linked to dataflow
-- Pluggable MQ and storage backends (Redis, Kafka, MongoDB, LMDB)
-- Web UI: provenance browser, dashboards, live updates, and an embedded LLM chat agent
-- [W3C PROV](https://www.w3.org/TR/prov-overview/) adherence
-
-Explore [Jupyter Notebooks](notebooks) and [Examples](examples) for usage.
-
-## Installation
-
-Flowcept can be installed in multiple ways, depending on your needs.
-
-### 1. Default Installation
-To install Flowcept with its basic dependencies from [PyPI](https://pypi.org/project/flowcept/), run:
-
-```shell
-pip install flowcept
-```
-
-This installs the minimal Flowcept package, **not** including MongoDB, Redis, MCP, or any adapter-specific dependencies.
-
-### 2. Installing Specific Adapters and Additional Dependencies
-
-Flowcept integrates with several tools and services, but you should **only install what you actually need**.
-Good practice is to cherry-pick the extras relevant to your workflow instead of installing them all.
-
-```shell
-pip install flowcept[mongo] # MongoDB support
-pip install flowcept[mlflow] # MLflow adapter
-pip install flowcept[dask] # Dask adapter
-pip install flowcept[tensorboard] # TensorBoard adapter
-pip install flowcept[kafka] # Kafka message queue
-pip install flowcept[nvidia] # NVIDIA GPU runtime capture
-pip install flowcept[amd] # AMD GPU runtime capture (see "Install AMD GPU Lib" for version/LD_LIBRARY_PATH notes)
-pip install flowcept[telemetry] # CPU/GPU/memory telemetry capture
-pip install flowcept[lmdb] # LMDB lightweight database
-pip install flowcept[mqtt] # MQTT support
-pip install flowcept[llm_agent] # MCP agent, LangChain, Streamlit integration: needed either for MCP capture or for the Flowcept Agent.
-pip install flowcept[llm_google] # Google GenAI + Flowcept agent support
-pip install flowcept[dev] # Developer dependencies (docs, tests, lint, etc.)
-```
-
-### 3. Installing with Common Runtime Bundle
-
-```shell
-pip install flowcept[extras]
+ # print(Flowcept.read_buffer_file()) # inspect raw JSONL records if needed
+ Flowcept.generate_report(print_markdown=True)
```
-The `extras` group is a convenience shortcut that bundles the most common runtime dependencies.
-It is intended for users who want a fairly complete, but not maximal, Flowcept environment.
-
-You might choose `flowcept[extras]` if:
-
-- You want Flowcept to run out-of-the-box with Redis, telemetry, and MongoDB.
-- You prefer not to install each extra one by one
-
-⚠️ If you only need one of these features, install it individually instead of `extras`.
-
-### 4. Install All Optional Dependencies at Once
-
-Flowcept provides a combined all extra, but installing everything into a single environment is not recommended for users.
-Many of these dependencies are unrelated and should not be mixed in the same runtime. This option is only intended for Flowcept developers who need to test across all adapters and integrations.
-
-```
-pip install flowcept[all]
-```
+Run it:
-### 5. Installing from Source
-To install Flowcept from the source repository:
-
-```
-git clone https://github.com/ORNL/flowcept.git
-cd flowcept
-pip install .
-```
-
-You can then install specific dependencies similarly as above:
-
-```
-pip install .[optional_dependency_name]
+```bash
+python quickstart.py
```
-This follows the same pattern as step 2, allowing for a customized installation from source.
-
-## Setup
+Flowcept captures task inputs, outputs, timing, workflow structure, and writes a local JSONL buffer.
+The final line reads the default buffer file and prints a workflow card.
-The [Quickstart](#quickstart) example works with just `pip install flowcept`, no extra setup is required.
+For the maintained tutorial, read the [Quick Start](https://flowcept.readthedocs.io/en/latest/quick_start.html).
-For online queries or distributed capture, Flowcept relies on two optional components:
+## Use The Web UI
-- **Message Queue (MQ)** — message broker / pub-sub / data stream
-- **Database (DB)** — persistent storage for historical queries
-
----
-
-#### Message Queue (MQ)
-
-- Required for anything beyond Quickstart
-- Flowcept publishes provenance data to the MQ during workflow runs
-- Developers can subscribe with custom consumers (see [this example](examples/consumers/simple_consumer.py).
-- You can monitor or print messages in motion using `flowcept --stream-messages --print`.
-
-Supported MQs:
-- [Redis](https://redis.io) → **default**, lightweight, works on Linux, macOS, Windows, and HPC (tested on [Frontier](link) and [Summit](link))
-- [Kafka](https://kafka.apache.org) → for distributed environments or if Kafka is already in your stack
-- [Mofka](https://mofka.readthedocs.io) → optimized for HPC runs
-
----
-
-#### Database (DB)
-
-- **Optional**, but required for:
- - Persisting provenance beyond MQ memory/disk buffers
- - Running complex analytical queries on historical data
-
-Supported DBs:
-- [MongoDB](https://www.mongodb.com) → default, efficient bulk writes + rich query support
-- [LMDB](https://lmdb.readthedocs.io) → lightweight, no external service, basic query capabilities
-
----
-
-### Notes
-
-- Without a DB:
- - Provenance remains in the MQ only (persistence not guaranteed)
- - Complex historical queries are unavailable
-- Flowcept’s architecture is modular: other MQs and DBs (graph, relational, etc.) can be added in the future
-- Deployment examples for MQ and DB are provided in the [deployment](deployment) directory
-
-
-### Downloading and Starting External Services (MQ or DB)
-
-Flowcept uses external services for message queues (MQ) and databases (DB). You can start them with Docker Compose, plain containers, or directly on your host.
-
----
-
-#### Using Docker Compose (recommended)
-
-We provide a [Makefile](deployment/Makefile) with shortcuts:
-
-1. **Redis only (no DB)**: `make services` (LMDB can be used in this setup as a lightweight DB)
-2. **Redis + MongoDB**: `make services-mongo`
-3. **Kafka + MongoDB**: `make services-kafka`
-4. **Mofka only (no DB)**: `make services-mofka`
-
-To customize, edit the YAML files in [deployment](deployment/) and run `docker compose -f deployment/.yml up -d`
-
----
-
-#### Using Docker (without Compose)
-
-See the [deployment/](deployment/) compose files for expected images and configurations. You can adapt them to your environment and use standard `docker pull / run / exec` commands.
-
----
-
-#### Running on the Host (no containers)
-
-1. Install binaries for the service you need:
- - **macOS** users can install with [Homebrew](https://brew.sh).
- Example for Redis:
- ```bash
- brew install redis
- brew services start redis
- ```
-
- - On Linux, use your distro package manager (e.g. `apt`, `dnf`, `yum`)
- - If non-root (typically the case if you want to deploy these services locally in an HPC system), search for the installed binaries for your OS/hardware architecture, download them in a directory that you have r+w permission, and run them.
- - On Windows, utilize [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) to use a Linux distro.
-
-2. Start services normally (`redis-server`, `mongod`, `kafka-server-start.sh`, etc.).
-
-## Flowcept Settings File
-
-Flowcept uses a settings file for configuration.
-
-- To create a minimal settings file, run: `flowcept --init-settings` → creates `~/.flowcept/settings.yaml`
-
-- To copy the full sample settings file, run: `flowcept --init-settings --full` → creates `~/.flowcept/settings.yaml`
-
-- To switch runtime mode, apply a profile after creating the file:
+For interactive and live provenance exploration, run Flowcept with MongoDB and the webservice.
+The UI shows captured workflows, so run one or more workflows after the service is up to populate the database.
```bash
+pip install "flowcept[webservice,mongo]"
+make services-mongo
flowcept --init-settings --full -y
flowcept --config-profile full-online -y
+flowcept --start-ui
```
-Meaning:
+Open `http://localhost:8008`.
-- `--init-settings` = minimal file with default settings.
-- `--init-settings --full` = copy `resources/sample_settings.yaml`
-- `--config-profile ...` = overlay a runtime mode on top of the existing file
-
----
-
-#### What You Can Configure
-
-- Message queue and database routes, ports, and paths
-- MCP agent ports and LLM API keys
-- Buffer sizes and flush settings
-- Telemetry capture settings
-- Instrumentation and PyTorch details
-- Log levels
-- Data observability adapters
-- And more (see [example file](resources/sample_settings.yaml))
-
----
-
-#### Custom Settings File
-
-Flowcept looks for its settings in the following order:
-
-1. Environment variable `FLOWCEPT_SETTINGS_PATH` — if set, Flowcept will use this path
-2. `~/.flowcept/settings.yaml` — created by running `flowcept --init-settings`
-3. [Default sample file](resources/sample_settings.yaml) — used if neither of the above is found
-
-Important:
-
-- environment variables can override settings values
-- use profiles for mode switches such as `full-online`, `full-offline`, `mq-only`, `mq-only-no-flush`, `full-telemetry`
-- adapter flags are additive:
+In another terminal, run a workflow with the same online settings, for example:
```bash
-flowcept --init-settings --dask -y
-flowcept --init-settings --mlflow -y
-flowcept --init-settings --tensorboard -y
+python examples/start_here.py
```
-They add `adapters.` to the current settings file instead of replacing the whole file.
+The UI provides:
-# Examples
+- **Workflow and campaign browser** for runs, tasks, artifacts, and agents.
+- **Dataflow graph** for W3C PROV-style task/data lineage.
+- **Task and data inspectors** for metadata, inputs, outputs, timing, and artifact details.
+- **Dashboards** for runtime summaries and workflow metrics.
+- **Workflow cards** downloadable as Markdown or PDF.
+- **Chat agent** for natural-language questions and interactive provenance visualization.
-### Adapters and Notebooks
+See the [Web UI docs](https://flowcept.readthedocs.io/en/latest/web_ui.html) and [ui/README.md](ui/README.md).
- See the [Jupyter Notebooks](notebooks) and [Examples directory](examples) for utilization examples.
+## Flowcept Agent
-## Workflow Cards
+The Flowcept Agent lets users ask natural language questions over captured provenance instead of hand-writing queries.
+It supports two complementary modes:
-The [Quickstart](#quickstart) example (`python quickstart.py`) shows a workflow card.
+- **Web chat**: built into the Flowcept UI. The chat webservice route calls Flowcept's LangChain tool orchestrator and keeps answers scoped to the workflow or campaign being viewed.
+- **MCP server**: exposes the same provenance tool surface to external assistants such as Codex, Claude Code, or other MCP clients.
-Flowcept introduces the Workflow Card concept: a structured markdown summary of a workflow execution covering:
+The tool surface covers two analysis paths:
-- **Summary** — workflow name, IDs, execution window, elapsed time, host, git info
-- **Workflow-level Summary** — activity count, status counts, top slowest activities
-- **Workflow Structure** — ASCII diagram of the activity DAG
-- **Timing Report** — per-activity start, end, and median elapsed times with insights
-- **Per Activity Details** — aggregated inputs (`used`) and outputs (`generated`) per activity
-- **Per-activity Resource Usage** — CPU, memory, disk I/O, network, and GPU deltas (when telemetry is captured)
-- **Object Artifacts Summary** — versioned artifacts produced or consumed by the workflow
+- **Streaming provenance tools** query records directly from the runtime stream. These are best for monitoring and runtime analysis while a workflow is still running.
+- **Persisted provenance tools** query MongoDB or LMDB. These are best for postmortem analysis, dashboards, workflow cards, and live analysis with a small flush delay.
-Cards also support **campaign-level reporting** for multi-workflow runs (replicated experiments or multi-stage pipelines):
+The Flowcept Agent itself also uses these tools through its LangChain orchestration layer.
-```python
-# From a JSONL buffer file (no DB needed)
-Flowcept.generate_report(input_jsonl_path="flowcept_messages.jsonl")
+Example questions:
-# From a live DB query
-Flowcept.generate_report(workflow_id="")
-Flowcept.generate_report(campaign_id="")
+- “Which activity took the longest?”
+- “Show the lineage of the selected model.”
+- “Which input files were larger than 100 MB?”
+- “Generate a workflow card for this campaign.”
+- “Highlight all tasks related to the best model.”
-# As PDF
-Flowcept.generate_report(workflow_id="", report_type="provenance_report", format="pdf")
-```
+See the [Flowcept Agent docs](https://flowcept.readthedocs.io/en/latest/agent.html) and [src/flowcept/agents/README.md](src/flowcept/agents/README.md).
-See [`docs/reporting.rst`](docs/reporting.rst) and [`src/flowcept/report/README.md`](src/flowcept/report/README.md) for the full reporting reference.
+## Capture Options
-## Web UI
+Flowcept supports several capture styles. Use the least invasive one that answers your provenance questions.
-Flowcept ships a built-in web interface for browsing and analyzing provenance data. Start it with:
+| Need | Use |
+|---|---|
+| Minimal function-level capture | `@flowcept_task` |
+| Whole workflow context | `with Flowcept():` or `@flowcept` |
+| Custom metadata and manual events | `FlowceptTask` |
+| Loop capture | `FlowceptLoop` |
+| ML model and tensor semantics | PyTorch instrumentation |
+| Tool/framework observability | Dask, MLflow, TensorBoard, MCP, and other adapters |
+| Distributed runtime stream | [Redis](https://redis.io), [RabbitMQ](https://www.rabbitmq.com), [Kafka](https://kafka.apache.org), or [Mofka](https://mofka.readthedocs.io) message queues |
+| Queryable persistent store | MongoDB or LMDB |
-```bash
-pip install flowcept[webservice]
-flowcept --start-ui # starts the webservice + dev server; open http://localhost:8008
-```
-
-Key features:
-- **Provenance browser** — campaigns, workflows, tasks, and artifacts with drill-down views
-- **Live updates** — SSE-based streaming so the task table updates while a workflow runs
-- **Dashboards** — per-workflow and per-campaign chart dashboards (configurable, stored in MongoDB)
-- **Dataflow graph** — W3C PROV-style graph of task inputs/outputs; click any node to inspect its provenance
-- **LLM chat agent** — ask natural-language questions about your provenance data; charts render inline; queries are automatically scoped to the current workflow or campaign
-- **Lineage highlighting** — ask the chat agent to highlight the full provenance lineage (ancestors + descendants) of any task directly in the Dataflow graph
-
-The chat agent queries the **persisted store** (MongoDB) and the **live stream** (via near-real-time DB flushes from the MQ). For sub-second in-flight queries, use the MCP agent instead.
+Read [Provenance Capture Methods](https://flowcept.readthedocs.io/en/latest/prov_capture.html) for examples.
-See [`docs/web_ui.rst`](docs/web_ui.rst) and [`ui/README.md`](ui/README.md) for the full reference.
+## Storage And Querying
-# Summary: Observability, Instrumentation, MQs, DBs, and Querying
+Flowcept can run fully offline or as an online distributed system.
-| Category | Supported Options |
-|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| **Data Observability Adapters** | [MLflow](https://github.com/ORNL/flowcept/blob/main/examples/mlflow_example.py), [Dask](https://github.com/ORNL/flowcept/blob/main/examples/dask_example.py), [TensorBoard](https://github.com/ORNL/flowcept/blob/main/examples/tensorboard_example.py) |
-| **Instrumentation and Decorators** | - [@flowcept](https://github.com/ORNL/flowcept/blob/main/examples/start_here.py): encapsulate a function (e.g., a main function) as a workflow. - [@flowcept_task](https://github.com/ORNL/flowcept/blob/main/examples/instrumented_simple_example.py): encapsulate a function as a task. - `@telemetry_flowcept_task`: same as `@flowcept_task`, but optimized for telemetry capture. - `@lightweight_flowcept_task`: same as `@flowcept_task`, but very lightweight, optimized for HPC workloads - [Loop](https://github.com/ORNL/flowcept/blob/main/examples/instrumented_loop_example.py) - [PyTorch Model](https://github.com/ORNL/flowcept/blob/main/examples/llm_complex/llm_model.py) - [MCP Agent](https://github.com/ORNL/flowcept/blob/main/examples/agents/aec_agent_mock.py) |
-| **Context Manager** | `with Flowcept():` `# Workflow code`
Similar to the `@flowcept` decorator, but more flexible for instrumenting code blocks that aren’t encapsulated in a single function and for workflows with scattered code across multiple files. |
-| **Custom Task Creation** | `FlowceptTask(activity_id=, used=, generated=, ...)`
Use for fully customizable task instrumentation. Publishes directly to the MQ either via context management (`with FlowceptTask(...)`) or by calling `send()`. It needs to have a `Flowcept().start()` first (or within a `with Flowcept()` context). See [example](examples/consumers/ping_pong_example.py). |
-| **Message Queues (MQ)** | - **Disabled** (offline mode: provenance events stay in an in-memory buffer, not accessible to external processes) - [Redis](https://redis.io) → default, lightweight, easy to run anywhere - [Kafka](https://kafka.apache.org) → for distributed, production setups - [Mofka](https://mofka.readthedocs.io) → optimized for HPC runs
_Setup example:_ [docker compose](https://github.com/ORNL/flowcept/blob/main/deployment/compose.yml) |
-| **Databases** | - **Disabled** → Flowcept runs in ephemeral mode (data only in MQ, no persistence) - **[MongoDB](https://www.mongodb.com)** → default, rich queries and efficient bulk writes - **[LMDB](https://lmdb.readthedocs.io)** → lightweight, file-based, no external service, basic query support |
-| **Querying and Monitoring** | - **[Web UI](docs/web_ui.rst)** → browser-based provenance browser with dashboards, live updates, and an embedded LLM chat agent that queries the persisted store and highlights provenance lineage in the Dataflow graph - **[Grafana](deployment/compose-grafana.yml)** → dashboarding via MongoDB connector - **MCP Flowcept Agent** → LLM-based querying of the live MQ stream (Redis/Kafka/Mofka) via external assistants (Claude Code, Codex, etc.) or offline JSONL buffer |
-| **Custom Consumer** | You can implement your own consumer to monitor or query the provenance stream in real time. Useful for custom analytics, monitoring, debugging, or to persist the data in a different data model (e.g., graph) . See [example](examples/consumers/simple_consumer.py). |
+| Mode | What happens |
+|---|---|
+| Offline JSONL | Provenance is captured locally and can be loaded later. |
+| MQ stream | Runtime records are streamed through [Redis](https://redis.io), [RabbitMQ](https://www.rabbitmq.com), [Kafka](https://kafka.apache.org), or [Mofka](https://mofka.readthedocs.io). |
+| MongoDB | Rich online queries, web UI, dashboards, workflow cards, and agent chat. |
+| LMDB | Lightweight local persistence without an external database service. |
+Query surfaces:
-## Performance Tuning for Performance Evaluation
+- Python API: `Flowcept.db`
+- CLI: `flowcept --query ...`
+- REST API: `flowcept --start-webservice`
+- Web UI: browser-based exploration and dashboards
+- Flowcept Agent: natural-language provenance querying and reasoning
-In the settings.yaml file, many variables may impact interception efficiency.
-Please be mindful of the following parameters:
+Read [Provenance Querying](https://flowcept.readthedocs.io/en/latest/prov_query.html), [Provenance Storage](https://flowcept.readthedocs.io/en/latest/prov_storage.html), and the [REST API docs](https://flowcept.readthedocs.io/en/latest/rest_api.html).
-* `mq`
- - `buffer_size` and `insertion_buffer_time_secs`. -- `buffer_size: 1` is really bad for performance, but it will give the most up-to-date info possible to the MQ.
-
-* `log`
- - set both stream and files to disable
-
-* `telemetry_capture`
- The more things you enable, the more overhead you'll get. For GPU, you can turn on/off specific metrics.
+## Workflow Cards
-* `instrumentation`
- This will configure whether every single granular step in the model training process will be captured. Disable very granular model inspection and try to use more lightweight methods. There are commented instructions in the settings.yaml sample file.
+Flowcept generates workflow cards: structured run summaries for reproducibility, review, and communication.
+Cards include workflow metadata, execution status, activity summaries, resource usage, artifacts, and lineage structure.
-Other thing to consider:
+```python
+from flowcept import Flowcept
-```
-project:
- replace_non_json_serializable: false # Here it will assume that all captured data are JSON serializable
- db_flush_mode: offline # This disables the feature of runtime analysis in the database.
-mq:
- chunk_size: -1 # This disables chunking the messages to be sent to the MQ. Use this only if the main memory of the compute notes is large enough.
+Flowcept.generate_report(workflow_id="")
+Flowcept.generate_report(campaign_id="")
+Flowcept.generate_report(workflow_id="", format="pdf")
```
-Other variables depending on the adapter may impact too. For instance, in Dask, timestamp creation by workers add interception overhead. As we evolve the software, other variables that impact overhead appear and we might not stated them in this README file yet. If you are doing extensive performance evaluation experiments using this software, please reach out to us (e.g., create an issue in the repository) for hints on how to reduce the overhead of our software.
+The Markdown workflow cards follow the upstream
+[Workflow Provenance Card template](https://github.com/data-cards/workflow-provenance-card).
-## Install AMD GPU Lib
+Read the [Reporting docs](https://flowcept.readthedocs.io/en/latest/reporting.html).
-Only needed for AMD GPU telemetry capture. NVIDIA users use `flowcept[nvidia]` instead.
+## Installation
+
+Install only what you need.
-**Quick install:**
```bash
-pip install flowcept[amd]
+pip install flowcept # minimal package
+pip install "flowcept[mongo]" # MongoDB support
+pip install "flowcept[webservice]" # REST API and web UI
+pip install "flowcept[dask]" # Dask adapter
+pip install "flowcept[mlflow]" # MLflow adapter
+pip install "flowcept[rabbitmq]" # RabbitMQ MQ
+pip install "flowcept[kafka]" # Kafka MQ
+pip install "flowcept[telemetry]" # CPU/memory telemetry
+pip install "flowcept[lmdb]" # LMDB storage
+pip install "flowcept[llm_agent]" # Flowcept Agent / MCP / chat dependencies
```
-This installs the latest `amdsmi` from PyPI. The `amdsmi` Python package is a thin wrapper around the system's `libamd_smi.so`, so the PyPI version must match your ROCm installation. If you get a runtime error like `undefined symbol` or `libamd_smi.so not found`, follow the steps below.
+`flowcept[all]` exists for development and broad testing, but it is usually better to install targeted extras.
-**Matching the version to your ROCm:**
+Read [Setup and Installation](https://flowcept.readthedocs.io/en/latest/setup.html).
-1. Find your ROCm version:
- ```bash
- ls /opt/rocm-* # e.g. /opt/rocm-6.2.4
- # or: rocm-smi --version
- ```
+## Settings
-2. Find the matching `amdsmi` PyPI version — the major/minor version tracks ROCm (e.g. ROCm 6.2.x → `amdsmi==6.2.*`, ROCm 7.0.x → `amdsmi==7.0.*`):
- ```bash
- pip index versions amdsmi # lists all available versions
- pip install amdsmi==
- ```
+Flowcept is configured through `settings.yaml`, with environment variables taking precedence.
-3. Set `LD_LIBRARY_PATH` so Python finds the correct shared library:
- ```bash
- export LD_LIBRARY_PATH=/opt/rocm-/lib:$LD_LIBRARY_PATH
- ```
- Add this to your job script or shell profile so it persists.
-
-**Verify:**
```bash
-python -c "from amdsmi import amdsmi_init, amdsmi_get_processor_handles; amdsmi_init(); print(len(amdsmi_get_processor_handles()), 'GPU(s) found')"
+flowcept --init-settings -y # minimal settings
+flowcept --init-settings --full -y # full template
+flowcept --config-profile full-online -y # online MQ + DB profile
```
-## Torch Dependencies
+Configuration precedence:
-Some unit tests utilize `torch==2.2.2`, `torchtext=0.17.2`, and `torchvision==0.17.2`. They are only really needed to run some tests and will be installed if you run `pip install flowcept[ml_dev]` or `pip install flowcept[all]`. If you want to use Flowcept with Torch, please adapt torch dependencies according to your project's dependencies.
+1. Environment variables.
+2. `FLOWCEPT_SETTINGS_PATH`.
+3. `~/.flowcept/settings.yaml`.
+4. Built-in defaults.
-## Documentation
+Read the [CLI Reference](https://flowcept.readthedocs.io/en/latest/cli-reference.html).
-Full documentation is available on [Read the Docs](https://flowcept.readthedocs.io/).
+## Examples And Notebooks
-## Cite us
+- [examples](examples): runnable scripts for decorators, Dask, MLflow, TensorBoard, PyTorch, agents, consumers, and object storage.
+- [notebooks](notebooks): exploratory examples and tutorials.
+- [Default User Guide](https://flowcept.readthedocs.io/en/latest/default_user_guide.html): recommended end-to-end guide.
-If you used Flowcept in your research, consider citing our paper.
+## For Code Assistants
-```
-Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability
-R. Souza, T. Skluzacek, S. Wilkinson, M. Ziatdinov, and R. da Silva
-19th IEEE International Conference on e-Science, 2023.
-```
+Code assistants should read [AGENTS.md](AGENTS.md) first.
+It is the single source of truth for repository-specific engineering rules.
+
+For Flowcept feature usage, read the maintained RST docs under [docs](docs) instead of inventing behavior from source snippets.
-**Bibtex:**
+## Cite Flowcept
-```latex
-@inproceedings{souza2023towards,
+If you use Flowcept in research, please cite:
+
+```bibtex
+@inproceedings{souza2023towards,
author = {Souza, Renan and Skluzacek, Tyler J and Wilkinson, Sean R and Ziatdinov, Maxim and da Silva, Rafael Ferreira},
booktitle = {IEEE International Conference on e-Science},
doi = {10.1109/e-Science58273.2023.10254822},
@@ -575,13 +296,19 @@ R. Souza, T. Skluzacek, S. Wilkinson, M. Ziatdinov, and R. da Silva
}
```
-## Disclaimer & Get in Touch
+More publications are listed in the [Flowcept publications page](https://flowcept.readthedocs.io/en/latest/publications.html).
+
+## Community And Contributing
-Refer to [Contributing](CONTRIBUTING.md) for adding new adapters or contributing with the codebase.
+Flowcept is research software developed for distributed scientific workflows.
+Issues, discussions, examples, adapters, and documentation improvements are welcome.
-Please note that this a research software. We encourage you to give it a try and use it with your own stack.
-We are continuously working on improving documentation and adding more examples and notebooks, but we are continuously improving documentation and examples. If you are interested in working with Flowcept in your own scientific project, we can give you a jump start if you reach out to us. Feel free to [create an issue](https://github.com/ORNL/flowcept/issues/new), [create a new discussion thread](https://github.com/ORNL/flowcept/discussions/new/choose) or drop us an email (we trust you'll find a way to reach out to us :wink:).
+- [Contributing guide](CONTRIBUTING.md)
+- [GitHub issues](https://github.com/ORNL/flowcept/issues/new)
+- [GitHub discussions](https://github.com/ORNL/flowcept/discussions/new/choose)
+- [Slack channel](https://workflowscommunity.slack.com/archives/C06L5GYJKQS)
## Acknowledgement
-This research uses resources of the Oak Ridge Leadership Computing Facility at the Oak Ridge National Laboratory, which is supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC05-00OR22725.
+This research uses resources of the Oak Ridge Leadership Computing Facility at Oak Ridge National Laboratory,
+which is supported by the Office of Science of the U.S. Department of Energy under Contract No. DE-AC05-00OR22725.
diff --git a/deployment/compose-rabbitmq.yml b/deployment/compose-rabbitmq.yml
new file mode 100644
index 00000000..b8478366
--- /dev/null
+++ b/deployment/compose-rabbitmq.yml
@@ -0,0 +1,33 @@
+name: flowcept
+services:
+ flowcept_redis:
+ container_name: flowcept_redis
+ image: redis
+ ports:
+ - 6379:6379
+
+ flowcept_mongo:
+ container_name: flowcept_mongo
+ image: mongo:latest
+ ports:
+ - 27017:27017
+ volumes:
+ - mongo_data:/data/db
+
+ # RabbitMQ broker; reachable on 5672 (AMQP) and 15672 (management UI)
+ flowcept_rabbitmq:
+ container_name: flowcept_rabbitmq
+ image: rabbitmq:3-management
+ ports:
+ - "5672:5672"
+ - "15672:15672"
+ environment:
+ RABBITMQ_DEFAULT_USER: guest
+ RABBITMQ_DEFAULT_PASS: guest
+
+networks:
+ flowcept:
+ driver: bridge
+
+volumes:
+ mongo_data:
diff --git a/docs/README.md b/docs/README.md
index 2134916c..40b99e08 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -445,7 +445,7 @@ agent:
#### Internal-LLM mode
-Flowcept builds the model using `build_llm_model()` (`src/flowcept/agents/agents_utils.py`).
+Flowcept builds the model using `build_llm_model()` (`src/flowcept/agents/llm/builders.py`).
Providers in code:
diff --git a/docs/agent.rst b/docs/agent.rst
index 6d211566..cd6bb894 100644
--- a/docs/agent.rst
+++ b/docs/agent.rst
@@ -27,35 +27,22 @@ Flowcept exposes provenance data to LLM-based agents through two complementary s
workflow is still executing. It also supports offline JSONL buffer files.
The two surfaces share the same underlying provenance tool core
-(``src/flowcept/agents/tools/prov_tools.py``) so queries stay consistent across both.
+(``src/flowcept/agents/data_query_tools/db_query_tools.py``) so queries stay consistent across both.
-The MCP agent has one backend and two orchestration paths:
-
-- **Internal LLM mode**: Flowcept builds the configured LLM and routes free-text messages through ``prompt_handler``.
-- **External LLM mode**: your outside assistant, such as Codex, Claude, LibreChat, Cursor, or another MCP client,
- owns routing and reasoning, while Flowcept provides the same MCP prompts, tools, and in-memory context.
-
-The modes are intended to expose the same functionality. The difference is only who orchestrates the tools.
+The MCP agent exposes explicit tools only. The outside assistant, such as Codex,
+Claude, LibreChat, Cursor, or another MCP client, owns routing and reasoning,
+while Flowcept provides the MCP prompts, tools, and in-memory context.
Configuring LLM orchestration
-----------------------------
-Internal mode:
-
-.. code-block:: yaml
-
- agent:
- external_llm: false
-
-External mode:
-
.. code-block:: yaml
agent:
external_llm: true
-In external mode, arbitrary free-text messages sent to ``prompt_handler`` are not internally routed. Use explicit
-commands, prompt-builder calls, and execution-tool calls from the outside assistant.
+When ``agent.external_llm`` is enabled, use explicit commands, prompt-builder
+calls, and execution-tool calls from the outside assistant.
Shared commands and prefixes
----------------------------
@@ -101,16 +88,16 @@ The agent resolves the matching task(s) via a Mongo-style filter, then the Dataf
tab dims all unrelated nodes and edges, tracing only the ancestor/descendant chain.
Click any node or empty space to reset the highlight manually.
-Internal prompt-handler example
--------------------------------
+Explicit MCP tool example
+-------------------------
.. code-block:: python
- from flowcept.agents.agent_client import run_tool
+ from flowcept.agents.mcp.mcp_client import run_tool
result = run_tool(
- "prompt_handler",
- kwargs={"message": "What are the top 5 slowest activities?"},
+ "run_workflow_query",
+ kwargs={"query": "What is the workflow name?"},
)
External prompt plus execution example
@@ -118,7 +105,7 @@ External prompt plus execution example
.. code-block:: python
- from flowcept.agents.agent_client import run_prompt, run_tool
+ from flowcept.agents.mcp.mcp_client import run_prompt, run_tool
prompt = run_prompt(
"build_df_query_prompt",
@@ -143,7 +130,7 @@ External workflow-message query example
.. code-block:: python
- from flowcept.agents.agent_client import run_prompt, run_tool
+ from flowcept.agents.mcp.mcp_client import run_prompt, run_tool
prompt = run_prompt(
"build_workflow_query_prompt",
@@ -170,7 +157,8 @@ This is a minimal offline example:
import json
from flowcept import Flowcept, flowcept_task
- from flowcept.agents.flowcept_agent import FlowceptAgent
+ from flowcept.agents.mcp.mcp_client import run_tool
+ from flowcept.agents.mcp.mcp_server import FlowceptMCPServer
@flowcept_task
def sum_one(x):
@@ -182,10 +170,10 @@ This is a minimal offline example:
f.dump_buffer("flowcept_buffer.jsonl")
# Start the agent from the buffer file and query it
- agent = FlowceptAgent(buffer_path="flowcept_buffer.jsonl")
+ agent = FlowceptMCPServer(buffer_path="flowcept_buffer.jsonl")
# Or load a list of messages directly
- # agent = FlowceptAgent(buffer_messages=msgs)
+ # agent = FlowceptMCPServer(buffer_messages=msgs)
agent.start()
- resp = agent.query("how many tasks?")
+ resp = run_tool("run_df_query", kwargs={"query": "how many tasks?"})[0]
print(json.loads(resp))
agent.stop()
diff --git a/docs/architecture.rst b/docs/architecture.rst
index b2a2341f..3e273db4 100644
--- a/docs/architecture.rst
+++ b/docs/architecture.rst
@@ -40,8 +40,9 @@ To minimize interference with HPC applications, provenance messages are buffered
and streamed asynchronously to a **publish–subscribe hub**.
Flowcept supports configurable flushing strategies and multiple broker backends:
-- **Redis** → low-latency messaging, minimal setup, default for most use cases.
-- **Kafka** → high throughput for data-intensive workflows.
+- **Redis** → low-latency messaging, minimal setup, default for most use cases.
+- **RabbitMQ** → AMQP-based broker, suitable for cloud and enterprise environments.
+- **Kafka** → high throughput for data-intensive workflows.
- **Mofka** → RDMA-optimized transport, ideal for tightly coupled HPC networks.
Regardless of the broker, all provenance messages follow a **common schema**.
diff --git a/docs/index.rst b/docs/index.rst
index b3029887..b0ec9be0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -77,3 +77,4 @@ Flowcept
contributing
cli-reference
api-reference
+ publications
diff --git a/docs/openapi/flowcept-openapi.json b/docs/openapi/flowcept-openapi.json
index f164a8db..c665e52c 100644
--- a/docs/openapi/flowcept-openapi.json
+++ b/docs/openapi/flowcept-openapi.json
@@ -12,7 +12,7 @@
"health"
],
"summary": "Live",
- "description": "Liveness check.",
+ "description": "Liveness check \u2014 process is running.",
"operationId": "live_api_v1_health_live_get",
"responses": {
"200": {
@@ -36,18 +36,14 @@
"health"
],
"summary": "Ready",
- "description": "Readiness check.",
+ "description": "Readiness check \u2014 verifies all enabled services via ``Flowcept.services_alive()``.\n\nWhich services are checked is driven by settings.yaml (MQ, KVDB, MongoDB, LMDB, LLM).\nReturns HTTP 200 when all enabled services are reachable, HTTP 503 otherwise.\nThe response body includes per-service status so callers can identify which\nservice is down without reading server logs.",
"operationId": "ready_api_v1_health_ready_get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
- "schema": {
- "additionalProperties": true,
- "type": "object",
- "title": "Response Ready Api V1 Health Ready Get"
- }
+ "schema": {}
}
}
}
@@ -656,6 +652,117 @@
}
}
},
+ "/api/v1/workflows/{workflow_id}/node_positions": {
+ "get": {
+ "tags": [
+ "workflows"
+ ],
+ "summary": "Get Node Positions",
+ "description": "Get node positions for a workflow graph type.",
+ "operationId": "get_node_positions_api_v1_workflows__workflow_id__node_positions_get",
+ "parameters": [
+ {
+ "name": "workflow_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Workflow Id"
+ }
+ },
+ {
+ "name": "graph_type",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "description": "Graph type: 'dataflow', 'task', or 'activity'",
+ "title": "Graph Type"
+ },
+ "description": "Graph type: 'dataflow', 'task', or 'activity'"
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": true,
+ "title": "Response Get Node Positions Api V1 Workflows Workflow Id Node Positions Get"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ },
+ "post": {
+ "tags": [
+ "workflows"
+ ],
+ "summary": "Save Node Positions",
+ "description": "Save node positions for a workflow graph type.",
+ "operationId": "save_node_positions_api_v1_workflows__workflow_id__node_positions_post",
+ "parameters": [
+ {
+ "name": "workflow_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Workflow Id"
+ }
+ }
+ ],
+ "requestBody": {
+ "required": true,
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": true,
+ "title": "Payload"
+ }
+ }
+ }
+ },
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": true,
+ "title": "Response Save Node Positions Api V1 Workflows Workflow Id Node Positions Post"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
"/api/v1/tasks": {
"get": {
"tags": [
@@ -1129,6 +1236,49 @@
}
}
}
+ },
+ "delete": {
+ "tags": [
+ "objects"
+ ],
+ "summary": "Delete Object",
+ "description": "Delete an object and all its versions by object_id.",
+ "operationId": "delete_object_api_v1_objects__object_id__delete",
+ "parameters": [
+ {
+ "name": "object_id",
+ "in": "path",
+ "required": true,
+ "schema": {
+ "type": "string",
+ "title": "Object Id"
+ }
+ }
+ ],
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "object",
+ "additionalProperties": true,
+ "title": "Response Delete Object Api V1 Objects Object Id Delete"
+ }
+ }
+ }
+ },
+ "422": {
+ "description": "Validation Error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/HTTPValidationError"
+ }
+ }
+ }
+ }
+ }
}
},
"/api/v1/objects/{object_id}/versions/{version}": {
@@ -2253,6 +2403,30 @@
}
}
},
+ "/api/v1/agents/cleanup/empty": {
+ "delete": {
+ "tags": [
+ "agents"
+ ],
+ "summary": "Delete Empty Agents",
+ "description": "Delete all agents from the database that don't have associated task_id.",
+ "operationId": "delete_empty_agents_api_v1_agents_cleanup_empty_delete",
+ "responses": {
+ "200": {
+ "description": "Successful Response",
+ "content": {
+ "application/json": {
+ "schema": {
+ "additionalProperties": true,
+ "type": "object",
+ "title": "Response Delete Empty Agents Api V1 Agents Cleanup Empty Delete"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
"/api/v1/stats/tasks/summary": {
"get": {
"tags": [
@@ -3065,7 +3239,8 @@
"enum": [
"tasks",
"workflows",
- "objects"
+ "objects",
+ "collection_sizes"
],
"title": "Source",
"default": "tasks"
@@ -3129,7 +3304,7 @@
"anyOf": [
{
"items": {
- "$ref": "#/components/schemas/SortSpec"
+ "$ref": "#/components/schemas/flowcept__webservice__schemas__dashboards__SortSpec"
},
"type": "array"
},
@@ -3226,6 +3401,17 @@
"type": "boolean",
"title": "Allow Dashboard Edit",
"default": false
+ },
+ "thread_id": {
+ "anyOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "null"
+ }
+ ],
+ "title": "Thread Id"
}
},
"type": "object",
@@ -3542,7 +3728,7 @@
"anyOf": [
{
"items": {
- "$ref": "#/components/schemas/SortSpec"
+ "$ref": "#/components/schemas/flowcept__webservice__schemas__common__SortSpec"
},
"type": "array"
},
@@ -3613,7 +3799,7 @@
"anyOf": [
{
"items": {
- "$ref": "#/components/schemas/SortSpec"
+ "$ref": "#/components/schemas/flowcept__webservice__schemas__common__SortSpec"
},
"type": "array"
},
@@ -3647,30 +3833,6 @@
"title": "QueryRequest",
"description": "Read-only query payload."
},
- "SortSpec": {
- "properties": {
- "field": {
- "type": "string",
- "minLength": 1,
- "title": "Field"
- },
- "order": {
- "type": "integer",
- "enum": [
- 1,
- -1
- ],
- "title": "Order",
- "default": 1
- }
- },
- "type": "object",
- "required": [
- "field"
- ],
- "title": "SortSpec",
- "description": "Sort field/order pair."
- },
"TimeseriesRequest": {
"properties": {
"filter": {
@@ -3762,6 +3924,53 @@
"type": "object",
"title": "VizSpec",
"description": "How a chart renders its rows."
+ },
+ "flowcept__webservice__schemas__common__SortSpec": {
+ "properties": {
+ "field": {
+ "type": "string",
+ "minLength": 1,
+ "title": "Field"
+ },
+ "order": {
+ "type": "integer",
+ "enum": [
+ 1,
+ -1
+ ],
+ "title": "Order",
+ "default": 1
+ }
+ },
+ "type": "object",
+ "required": [
+ "field"
+ ],
+ "title": "SortSpec",
+ "description": "Sort field/order pair."
+ },
+ "flowcept__webservice__schemas__dashboards__SortSpec": {
+ "properties": {
+ "field": {
+ "type": "string",
+ "title": "Field"
+ },
+ "order": {
+ "type": "integer",
+ "enum": [
+ 1,
+ -1
+ ],
+ "title": "Order",
+ "default": 1
+ }
+ },
+ "type": "object",
+ "required": [
+ "field"
+ ],
+ "title": "SortSpec",
+ "description": "Sort field/order pair for chart data queries."
}
}
}
diff --git a/docs/openapi/flowcept-openapi.yaml b/docs/openapi/flowcept-openapi.yaml
index 5b7c83af..07c0d552 100644
--- a/docs/openapi/flowcept-openapi.yaml
+++ b/docs/openapi/flowcept-openapi.yaml
@@ -10,7 +10,7 @@ paths:
tags:
- health
summary: Live
- description: Liveness check.
+ description: "Liveness check \u2014 process is running."
operationId: live_api_v1_health_live_get
responses:
'200':
@@ -26,17 +26,18 @@ paths:
tags:
- health
summary: Ready
- description: Readiness check.
+ description: "Readiness check \u2014 verifies all enabled services via ``Flowcept.services_alive()``.\n\
+ \nWhich services are checked is driven by settings.yaml (MQ, KVDB, MongoDB,\
+ \ LMDB, LLM).\nReturns HTTP 200 when all enabled services are reachable, HTTP\
+ \ 503 otherwise.\nThe response body includes per-service status so callers\
+ \ can identify which\nservice is down without reading server logs."
operationId: ready_api_v1_health_ready_get
responses:
'200':
description: Successful Response
content:
application/json:
- schema:
- additionalProperties: true
- type: object
- title: Response Ready Api V1 Health Ready Get
+ schema: {}
/api/v1/info:
get:
tags:
@@ -417,6 +418,81 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
+ /api/v1/workflows/{workflow_id}/node_positions:
+ get:
+ tags:
+ - workflows
+ summary: Get Node Positions
+ description: Get node positions for a workflow graph type.
+ operationId: get_node_positions_api_v1_workflows__workflow_id__node_positions_get
+ parameters:
+ - name: workflow_id
+ in: path
+ required: true
+ schema:
+ type: string
+ title: Workflow Id
+ - name: graph_type
+ in: query
+ required: true
+ schema:
+ type: string
+ description: 'Graph type: ''dataflow'', ''task'', or ''activity'''
+ title: Graph Type
+ description: 'Graph type: ''dataflow'', ''task'', or ''activity'''
+ responses:
+ '200':
+ description: Successful Response
+ content:
+ application/json:
+ schema:
+ type: object
+ additionalProperties: true
+ title: Response Get Node Positions Api V1 Workflows Workflow Id Node
+ Positions Get
+ '422':
+ description: Validation Error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/HTTPValidationError'
+ post:
+ tags:
+ - workflows
+ summary: Save Node Positions
+ description: Save node positions for a workflow graph type.
+ operationId: save_node_positions_api_v1_workflows__workflow_id__node_positions_post
+ parameters:
+ - name: workflow_id
+ in: path
+ required: true
+ schema:
+ type: string
+ title: Workflow Id
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ additionalProperties: true
+ title: Payload
+ responses:
+ '200':
+ description: Successful Response
+ content:
+ application/json:
+ schema:
+ type: object
+ additionalProperties: true
+ title: Response Save Node Positions Api V1 Workflows Workflow Id Node
+ Positions Post
+ '422':
+ description: Validation Error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/HTTPValidationError'
/api/v1/tasks:
get:
tags:
@@ -699,6 +775,34 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
+ delete:
+ tags:
+ - objects
+ summary: Delete Object
+ description: Delete an object and all its versions by object_id.
+ operationId: delete_object_api_v1_objects__object_id__delete
+ parameters:
+ - name: object_id
+ in: path
+ required: true
+ schema:
+ type: string
+ title: Object Id
+ responses:
+ '200':
+ description: Successful Response
+ content:
+ application/json:
+ schema:
+ type: object
+ additionalProperties: true
+ title: Response Delete Object Api V1 Objects Object Id Delete
+ '422':
+ description: Validation Error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/HTTPValidationError'
/api/v1/objects/{object_id}/versions/{version}:
get:
tags:
@@ -1394,6 +1498,23 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/HTTPValidationError'
+ /api/v1/agents/cleanup/empty:
+ delete:
+ tags:
+ - agents
+ summary: Delete Empty Agents
+ description: Delete all agents from the database that don't have associated
+ task_id.
+ operationId: delete_empty_agents_api_v1_agents_cleanup_empty_delete
+ responses:
+ '200':
+ description: Successful Response
+ content:
+ application/json:
+ schema:
+ additionalProperties: true
+ type: object
+ title: Response Delete Empty Agents Api V1 Agents Cleanup Empty Delete
/api/v1/stats/tasks/summary:
get:
tags:
@@ -1915,6 +2036,7 @@ components:
- tasks
- workflows
- objects
+ - collection_sizes
title: Source
default: tasks
filter:
@@ -1948,7 +2070,7 @@ components:
sort:
anyOf:
- items:
- $ref: '#/components/schemas/SortSpec'
+ $ref: '#/components/schemas/flowcept__webservice__schemas__dashboards__SortSpec'
type: array
- type: 'null'
title: Sort
@@ -2013,6 +2135,11 @@ components:
type: boolean
title: Allow Dashboard Edit
default: false
+ thread_id:
+ anyOf:
+ - type: string
+ - type: 'null'
+ title: Thread Id
type: object
required:
- messages
@@ -2227,7 +2354,7 @@ components:
sort:
anyOf:
- items:
- $ref: '#/components/schemas/SortSpec'
+ $ref: '#/components/schemas/flowcept__webservice__schemas__common__SortSpec'
type: array
- type: 'null'
title: Sort
@@ -2271,7 +2398,7 @@ components:
sort:
anyOf:
- items:
- $ref: '#/components/schemas/SortSpec'
+ $ref: '#/components/schemas/flowcept__webservice__schemas__common__SortSpec'
type: array
- type: 'null'
title: Sort
@@ -2289,24 +2416,6 @@ components:
type: object
title: QueryRequest
description: Read-only query payload.
- SortSpec:
- properties:
- field:
- type: string
- minLength: 1
- title: Field
- order:
- type: integer
- enum:
- - 1
- - -1
- title: Order
- default: 1
- type: object
- required:
- - field
- title: SortSpec
- description: Sort field/order pair.
TimeseriesRequest:
properties:
filter:
@@ -2374,3 +2483,38 @@ components:
type: object
title: VizSpec
description: How a chart renders its rows.
+ flowcept__webservice__schemas__common__SortSpec:
+ properties:
+ field:
+ type: string
+ minLength: 1
+ title: Field
+ order:
+ type: integer
+ enum:
+ - 1
+ - -1
+ title: Order
+ default: 1
+ type: object
+ required:
+ - field
+ title: SortSpec
+ description: Sort field/order pair.
+ flowcept__webservice__schemas__dashboards__SortSpec:
+ properties:
+ field:
+ type: string
+ title: Field
+ order:
+ type: integer
+ enum:
+ - 1
+ - -1
+ title: Order
+ default: 1
+ type: object
+ required:
+ - field
+ title: SortSpec
+ description: Sort field/order pair for chart data queries.
diff --git a/docs/publications.rst b/docs/publications.rst
new file mode 100644
index 00000000..60f6c81e
--- /dev/null
+++ b/docs/publications.rst
@@ -0,0 +1,155 @@
+Publications
+============
+
+Flowcept Papers
+---------------
+
+**Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability**
+
+R. Souza, T. J. Skluzacek, S. R. Wilkinson, M. Ziatdinov, and R. Ferreira da Silva.
+*IEEE International Conference on e-Science*, Limassol, Cyprus, 2023.
+
+Introduces Flowcept's lightweight runtime provenance and data observability architecture and
+shows minimal-intrusion capture across heterogeneous workflows.
+
+Links:
+`doi `__ |
+`pdf `__
+
+
+**PROV-AGENT: Unified Provenance for Tracking AI Agent Interactions in Agentic Workflows**
+
+R. Souza, A. Gueroudji, S. DeWitt, D. Rosendo, T. Ghosal, R. Ross,
+P. Balaprakash, and R. Ferreira da Silva.
+*IEEE International Conference on e-Science*, Chicago, USA, 2025.
+
+Defines agentic provenance and a unified provenance model and tooling to capture,
+link, and query AI-agent interactions within agentic workflows.
+
+Links:
+`doi `__ |
+`pdf `__ |
+`html `__
+
+
+**Workflow Provenance in the Computing Continuum for Responsible, Trustworthy, and Energy-Efficient AI**
+
+R. Souza, S. Caino-Lores, M. Coletti, T. J. Skluzacek, A. Costan,
+F. Suter, M. Mattoso, and R. Ferreira da Silva.
+*IEEE International Conference on e-Science*, Osaka, Japan, 2024.
+
+Explains how end-to-end provenance across edge, cloud, and HPC supports responsible,
+trustworthy, and energy-aware AI workflows.
+
+Links:
+`doi `__ |
+`pdf `__
+
+
+**LLM Agents for Interactive Workflow Provenance: Reference Architecture and Evaluation Methodology**
+
+R. Souza, T. Poteet, B. Etz, D. Rosendo, A. Gueroudji, W. Shin,
+P. Balaprakash, and R. Ferreira da Silva.
+*Workflows in Support of Large-Scale Science (WORKS), co-located with SC*,
+St. Louis, USA, 2025.
+
+Presents a reference architecture and evaluation method for LLM agents that query
+and act on large-scale provenance databases.
+
+Links:
+`doi `__ |
+`pdf `__ |
+`html `__
+
+
+Papers That Used Flowcept
+-------------------------
+
+**Toward a Persistent Event-Streaming System for High-Performance Computing Applications**
+
+M. Dorier, A. Gueroudji, V. Hayot-Sasson, H. Nguyen, S. Ockerman,
+R. Souza, T. Bicer, H. Pan, P. Carns, K. Chard, and others.
+*Frontiers in High Performance Computing*, 2025.
+
+Demonstrates Flowcept generating high-volume provenance that is persistently
+streamed with Mofka for HPC applications.
+
+Links:
+`doi `__ |
+`pdf `__ |
+`html `__
+
+
+**AI Agents for Enabling Autonomous Experiments at ORNL's HPC and Manufacturing User Facilities**
+
+D. Rosendo, S. DeWitt, R. Souza, P. Austria, T. Ghosal, M. McDonnell,
+R. Miller, T. Skluzacek, J. Haley, B. Turcksin, and others.
+*Extreme-Scale Experiment-in-the-Loop Computing (XLOOP), co-located with SC*,
+2025.
+
+Leverages Flowcept's agentic provenance to coordinate multi-agent experiments
+and connect agents with HPC simulations through a shared provenance stream.
+
+Links:
+`doi `__ |
+`pdf `__ |
+`html `__
+
+
+BibTeX
+------
+
+.. code-block:: bibtex
+
+ @inproceedings{souza2023towards,
+ title={Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability},
+ author={Souza, Renan and Skluzacek, Tyler J and Wilkinson, Sean R and Ziatdinov, Maxim and da Silva, Rafael Ferreira},
+ booktitle={IEEE International Conference on e-Science},
+ doi={10.1109/e-Science58273.2023.10254822},
+ url={https://doi.org/10.1109/e-Science58273.2023.10254822},
+ pdf={https://arxiv.org/pdf/2308.09004.pdf},
+ year={2023}
+ }
+
+ @inproceedings{souza_prov_agent_2025,
+ author={Renan Souza and Amal Gueroudji and Stephen DeWitt and Daniel Rosendo and Tirthankar Ghosal and Robert Ross and Prasanna Balaprakash and Rafael Ferreira da Silva},
+ title={PROV-AGENT: Unified Provenance for Tracking {AI} Agent Interactions in Agentic Workflows},
+ booktitle={IEEE International Conference on e-Science},
+ year={2025},
+ doi={10.1109/eScience65000.2025.00093},
+ pdf={https://arxiv.org/pdf/2508.02866}
+ }
+
+ @inproceedings{souza_rtai_2024,
+ author={Renan Souza and Silvina Caino-Lores and Mark Coletti and Tyler J. Skluzacek and Alexandru Costan and Frederic Suter and Marta Mattoso and Rafael Ferreira da Silva},
+ title={Workflow Provenance in the Computing Continuum for Responsible, Trustworthy, and Energy-Efficient {AI}},
+ booktitle={IEEE International Conference on e-Science},
+ year={2024},
+ doi={10.1109/e-Science62913.2024.10678731},
+ pdf={https://hal.science/hal-04902079v1/document}
+ }
+
+ @inproceedings{souza_llm_agents_works_sc25,
+ title={{LLM} Agents for Interactive Workflow Provenance: Reference Architecture and Evaluation Methodology},
+ author={Souza, Renan and Poteet, Timothy and Etz, Brian and Rosendo, Daniel and Gueroudji, Amal and others},
+ booktitle={Workflows in Support of Large-Scale Science ({WORKS}) co-located with the {ACM}/{IEEE} International Conference for High Performance Computing, Networking, Storage, and Analysis ({SC})},
+ year={2025},
+ doi={10.1145/3731599.3767582}
+ }
+
+ @article{dorier2025toward,
+ author={Dorier, Matthieu and Gueroudji, Amal and Hayot-Sasson, Valerie and Nguyen, Hai and Ockerman, Seth and Souza, Renan and Bicer, Tekin and Pan, Haochen and Carns, Philip and Chard, Kyle and others},
+ doi={10.3389/fhpcp.2025.1638203},
+ journal={Frontiers in High Performance Computing},
+ title={Toward a Persistent Event-Streaming System for High-Performance Computing Applications},
+ volume={3},
+ year={2025}
+ }
+
+ @inproceedings{rosendo2025ai,
+ author={Rosendo, Daniel and DeWitt, Stephen and Souza, Renan and Austria, Phillipe and Ghosal, Tirthankar and McDonnell, Marshall and Miller, Ross and Skluzacek, Tyler J and Haley, James and Turcksin, Bruno and others},
+ booktitle={Extreme-Scale Experiment-in-the-Loop Computing ({XLOOP}) co-located with the {ACM}/{IEEE} International Conference for High Performance Computing, Networking, Storage, and Analysis ({SC})},
+ title={AI Agents for Enabling Autonomous Experiments at ORNL's HPC and Manufacturing User Facilities},
+ year={2025},
+ doi={10.1145/3731599.3767592}
+ }
diff --git a/docs/quick_start.rst b/docs/quick_start.rst
index 6ef231c8..167b3722 100644
--- a/docs/quick_start.rst
+++ b/docs/quick_start.rst
@@ -8,22 +8,19 @@ Quick Start
The easiest way to capture provenance from plain Python functions—no external services required.
-Install and Initialize
-----------------------
+Install
+-------
-First, install Flowcept and initialize a settings file:
+First, install Flowcept:
.. code-block:: bash
# Make sure you activate your Python environment (conda, venv, etc.)
pip install flowcept
- flowcept --init-settings
-This creates a minimal settings file at ``~/.flowcept/settings.yaml``.
-
-Use this for the simplest offline path.
-
-If you need the full config structure instead, use:
+The simplest offline path does not require a settings file. If you need the
+full config structure for online services, adapters, telemetry, or deployment
+profiles, use:
.. code-block:: bash
@@ -40,8 +37,6 @@ Save the following script as ``quickstart.py`` and run ``python quickstart.py``:
Minimal example of Flowcept's instrumentation using decorators.
No DB, broker, or external service required.
"""
- import json
-
from flowcept import Flowcept, flowcept_task
from flowcept.instrumentation.flowcept_decorator import flowcept
@@ -67,9 +62,8 @@ Save the following script as ``quickstart.py`` and run ``python quickstart.py``:
if __name__ == "__main__":
main()
- prov_buffer = Flowcept.read_buffer_file()
- assert len(prov_buffer) == 2
- print(json.dumps(prov_buffer, indent=2))
+ # print(Flowcept.read_buffer_file()) # inspect raw JSONL records if needed
+ Flowcept.generate_report(print_markdown=True)
Inspecting the Output
---------------------
diff --git a/docs/schemas.rst b/docs/schemas.rst
index 4b2feee9..55ae2e59 100644
--- a/docs/schemas.rst
+++ b/docs/schemas.rst
@@ -14,10 +14,14 @@ Data Schemas for Flowcept data.
PROV-AGENT and Flowcept
=======================
-PROV-AGENT is a lightweight extension of `W3C PROV `_ for agentic workflows. It names the
-main building blocks you see in modern AI systems:
+PROV-AGENT is a `W3C PROV `_ extension for capturing provenance of agentic AI workflows.
+It is described in:
-- **Activities** such as Campaign, Workflow, and Task
+ R. Souza et al., *PROV-AGENT: Unified Provenance for Tracking AI Agent Interactions in Agentic Workflows*, IEEE International Conference on e-Science, Chicago, IL, USA, 2025. https://arxiv.org/abs/2508.02866
+
+PROV-AGENT names the main building blocks you see in modern AI systems:
+
+- **Activities** such as Campaign, Workflow, Task, AIModelInvocation, and AgentTool
- **Agents** such as an AI agent or a human user
- **Data Objects** such as domain data, prompts, responses, scheduling info, and telemetry
- **Relations** such as *used*, *wasGeneratedBy*, *wasAssociatedWith*, *wasAttributedTo*, and *wasInformedBy*
@@ -42,6 +46,41 @@ At a high level:
- **Relations** are preserved with IDs and standard fields (for example, workflow IDs, parent or dependency links),
so the graph remains connected and queryable.
+PROV-AGENT task subtypes
+------------------------
+The ``subtype`` field on a Task record narrows it to a specific PROV-AGENT activity class.
+Use the :class:`~flowcept.commons.vocabulary.PROV_AGENT` enum to set these values:
+
+.. list-table::
+ :header-rows: 1
+ :widths: 30 25 45
+
+ * - Enum value
+ - Stored string
+ - Description
+ * - ``PROV_AGENT.AI_MODEL_INVOCATION``
+ - ``ai_model_invocation``
+ - A single LLM prompt→response call (*AIModelInvocation* in PROV-AGENT).
+ Captured automatically by :class:`~flowcept.instrumentation.flowcept_agent_task.FlowceptLLM`.
+ ``used.prompt`` stores the input; ``generated.response`` stores the output;
+ ``custom_metadata.llm_usage`` stores token counts.
+ * - ``PROV_AGENT.AGENT_TOOL``
+ - ``agent_tool``
+ - A tool execution by an AI agent (*AgentTool* in PROV-AGENT).
+ Captured automatically by the
+ :func:`~flowcept.instrumentation.flowcept_agent_task.agent_flowcept_task` decorator
+ applied to MCP tools and LangGraph tool nodes.
+ ``used`` stores tool arguments; ``generated`` stores the return value.
+
+The ``wasInformedBy`` relation — an ``AgentTool`` activity informing an ``AIModelInvocation`` — is
+the key link for root-cause analysis and downstream impact tracing in PROV-AGENT. In Flowcept this
+is expressed through the ``agent_id`` field: every task with the same ``agent_id`` belongs to the
+same AI agent and can be queried together to reconstruct the full agent provenance graph.
+
+The UI uses ``subtype`` to visually distinguish AI agent activities from regular workflow tasks.
+Filter for ``subtype == "ai_model_invocation"`` or ``subtype == "agent_tool"`` to isolate agent
+interactions from the provenance database.
+
Figure
------
.. only:: html
diff --git a/docs/setup.rst b/docs/setup.rst
index 3fd9b0c9..042e85b7 100644
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -31,6 +31,7 @@ Good practice is to cherry-pick the extras relevant to your workflow instead of
pip install flowcept[mlflow] # MLflow adapter
pip install flowcept[dask] # Dask adapter
pip install flowcept[tensorboard] # TensorBoard adapter
+ pip install flowcept[rabbitmq] # RabbitMQ message queue
pip install flowcept[kafka] # Kafka message queue
pip install flowcept[nvidia] # NVIDIA GPU runtime capture
pip install flowcept[telemetry] # CPU/GPU/memory telemetry capture
@@ -114,9 +115,10 @@ Message Queue (MQ)
Supported MQs:
-- `Redis `_ → **default**, lightweight, works on Linux, macOS, Windows, and HPC (tested on Frontier and Summit)
-- `Kafka `_ → for distributed environments or if Kafka is already in your stack
-- `Mofka `_ → optimized for HPC runs
+- `Redis `_ → **default**, lightweight, works on Linux, macOS, Windows, and HPC (tested on Frontier and Summit)
+- `RabbitMQ `_ → AMQP-based broker, suitable for cloud and enterprise environments
+- `Kafka `_ → for distributed environments or if Kafka is already in your stack
+- `Mofka `_ → optimized for HPC runs
Database (DB)
--------------
@@ -149,10 +151,11 @@ Using Docker Compose (recommended)
We provide a `Makefile `_ with shortcuts:
-1. **Redis only (no DB)**: ``make services`` (LMDB can be used in this setup as a lightweight DB)
-2. **Redis + MongoDB**: ``make services-mongo``
-3. **Kafka + MongoDB**: ``make services-kafka``
-4. **Mofka only (no DB)**: ``make services-mofka``
+1. **Redis only (no DB)**: ``make services`` (LMDB can be used in this setup as a lightweight DB)
+2. **Redis + MongoDB**: ``make services-mongo``
+3. **RabbitMQ + MongoDB**: ``make services-rabbitmq``
+4. **Kafka + MongoDB**: ``make services-kafka``
+5. **Mofka only (no DB)**: ``make services-mofka``
To customize, edit the YAML files in `deployment `_ and run:
diff --git a/examples/agents/aec_agent_context_manager.py b/examples/agents/aec_agent_context_manager.py
index 168c2b78..aa2f9350 100644
--- a/examples/agents/aec_agent_context_manager.py
+++ b/examples/agents/aec_agent_context_manager.py
@@ -3,7 +3,7 @@
import json
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
-from flowcept.agents.agent_client import run_tool
+from flowcept.agents.mcp.mcp_client import run_tool
@dataclass
diff --git a/examples/agents/aec_agent_mock.py b/examples/agents/aec_agent_mock.py
index c3c9f1c1..351c36e5 100644
--- a/examples/agents/aec_agent_mock.py
+++ b/examples/agents/aec_agent_mock.py
@@ -7,7 +7,7 @@
from mcp.server.fastmcp import FastMCP
from flowcept.configs import AGENT
-from flowcept.agents.agents_utils import build_llm_model
+from flowcept.agents.llm.builders import build_llm_model
from examples.agents.aec_agent_context_manager import AdamantineAeCContextManager
from examples.agents.aec_prompts import choose_option_prompt, generate_options_set_prompt
diff --git a/examples/agents/opt_driver_mock.py b/examples/agents/opt_driver_mock.py
index bbd3b339..c966dce0 100644
--- a/examples/agents/opt_driver_mock.py
+++ b/examples/agents/opt_driver_mock.py
@@ -4,7 +4,7 @@
from typing import Dict, List
from flowcept.flowcept_api.flowcept_controller import Flowcept
-from flowcept.agents.agent_client import run_tool
+from flowcept.agents.mcp.mcp_client import run_tool
from flowcept.flowceptor.consumers.base_consumer import BaseConsumer
from flowcept.instrumentation.flowcept_task import flowcept_task
from flowcept.instrumentation.task_capture import FlowceptTask
diff --git a/examples/llm_complex/llm_model.py b/examples/llm_complex/llm_model.py
index 80a718dc..b7532c65 100644
--- a/examples/llm_complex/llm_model.py
+++ b/examples/llm_complex/llm_model.py
@@ -229,7 +229,7 @@ def model_train(
if val_loss < best_val_loss:
best_val_loss = val_loss
if with_persistence:
- best_obj_id = Flowcept.db.save_or_update_torch_model(
+ best_obj_id = Flowcept.insert_or_update_torch_model(
model,
object_id=best_obj_id,
task_id=epochs_loop.get_current_iteration_id(),
diff --git a/examples/llm_tutorial/llm_model.py b/examples/llm_tutorial/llm_model.py
index ac8cfdfa..e52db3f5 100644
--- a/examples/llm_tutorial/llm_model.py
+++ b/examples/llm_tutorial/llm_model.py
@@ -231,7 +231,7 @@ def model_train(
if val_loss < best_val_loss:
best_val_loss = val_loss
if with_persistence:
- best_obj_id = Flowcept.db.save_or_update_torch_model(
+ best_obj_id = Flowcept.insert_or_update_torch_model(
model,
object_id=best_obj_id,
task_id=epochs_loop.get_current_iteration_id(),
diff --git a/examples/start_here.py b/examples/start_here.py
index b17e7f4f..7e856e5c 100644
--- a/examples/start_here.py
+++ b/examples/start_here.py
@@ -2,11 +2,10 @@
This is a very simple script to show the basic instrumentation capabilities of Flowcept, using its most straightforward
way of capturing workflow provenance from functions: using @decorators. It is meant to be executed in offline model.
-Flowcept will flush its internal buffer to a simple JSONL file in the end, if a `dump_buffer_path` is defined in
- the settings file (typically under ~/.flowcept/settings.yaml).
+Flowcept will flush its internal buffer to a simple JSONL file at the end of the run.
This very simple scenario does not need any database, streaming service, message queue or any other external service.
-It should run fine after installing Flowcept via `pip install flowcept` and running `$> flowcept --init-settings`.
+It should run fine after installing Flowcept via `pip install flowcept`.
For more complex features, such as online provenance analysis, HPC requirements, federated/highly distributed execution,
data observability from existing adapters, PyTorch models, telemetry capture optimization, query requirements, or
@@ -45,11 +44,8 @@ def main():
main()
# Reporting and verifications:
- prov_buffer = Flowcept.read_buffer_file()
- assert len(prov_buffer) == 2
+ raw_records = Flowcept.read_buffer_file()
+ assert len(raw_records) == 2
workflow_card_path = "WORKFLOW_CARD.md"
- report_stats = Flowcept.generate_report(
- records=prov_buffer,
- output_path=workflow_card_path,
- )
+ Flowcept.generate_report(output_path=workflow_card_path)
print(f"{workflow_card_path} generated!")
diff --git a/examples/unmanaged/simple_task2.py b/examples/unmanaged/simple_task2.py
index a834a9d2..926e3423 100644
--- a/examples/unmanaged/simple_task2.py
+++ b/examples/unmanaged/simple_task2.py
@@ -41,4 +41,4 @@ def super_func4(alpha):
flowcept.stop()
flowcept_messages = Flowcept.read_buffer_file()
- assert len(flowcept_messages) == 5
+ assert len(flowcept_messages) == 6
diff --git a/pyproject.toml b/pyproject.toml
index 326ef45b..f763ce57 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,14 +47,15 @@ dependencies = [
"omegaconf",
"numpy",
"msgpack",
- "orjson"
+ "orjson",
+ "rich",
]
[project.optional-dependencies]
redis = ["redis<8"]
lmdb = ["lmdb"]
telemetry = ["psutil>=6.1.1", "py-cpuinfo"]
-extras = ["flowcept[redis]", "flowcept[telemetry]", "flowcept[mongo]", "GitPython", "pandas", "pyarrow", "requests", "rich"]
+extras = ["flowcept[redis]", "flowcept[telemetry]", "flowcept[mongo]", "GitPython", "pandas", "pyarrow", "requests"]
webservice = ["fastapi", "uvicorn", "pyyaml", "sse-starlette"]
report_pdf = ["matplotlib", "reportlab", "networkx"]
@@ -62,12 +63,13 @@ mongo = ["pymongo", "pyarrow"]
dask = ["tomli", "dask[distributed]<=2024.10.0"]
docs = ["sphinx", "furo"]
kafka = ["confluent-kafka<=2.8.0"] # As of today, 2/28/2025, version 2.8.1 is stale. When this gets fixed, let's remove the version constraint. https://pypi.org/project/confluent-kafka/#history
+rabbitmq = ["pika"]
mlflow = ["mlflow-skinny", "SQLAlchemy", "alembic", "watchdog", "cryptography"]
nvidia = ["nvidia-ml-py"]
amd = ["amdsmi"]
mqtt = ["paho-mqtt"]
tensorboard = ["tensorboard", "tensorflow", "tbparse"]
-llm_agent = ["mcp[cli]", "langchain_community", "langchain_openai", "streamlit", "PyMuPDF", "matplotlib", "tabulate"]
+llm_agent = ["mcp[cli]", "langchain_community", "langchain_openai", "langgraph", "streamlit", "PyMuPDF", "matplotlib", "tabulate"]
llm_google = ["flowcept[llm_agent]", "google-genai"]
llm_agent_audio = ["flowcept[llm_agent]", "streamlit-mic-recorder", "SpeechRecognition", "pydub", "gTTS"]
# System dependency (required for pydub)
@@ -78,9 +80,9 @@ llm_agent_audio = ["flowcept[llm_agent]", "streamlit-mic-recorder", "SpeechRecog
dev = [
"flowcept[docs]",
+ "flowcept[rabbitmq]",
"jupyterlab",
"nbmake",
- "pika",
"pytest",
"pytest-timeout",
"ruff",
@@ -105,6 +107,7 @@ all = [
"flowcept[mongo]",
"flowcept[dask]",
"flowcept[kafka]",
+ "flowcept[rabbitmq]",
"flowcept[mlflow]",
"flowcept[mqtt]",
"flowcept[tensorboard]",
@@ -136,6 +139,9 @@ artifacts = ["src/flowcept/webservice/ui_build/**"]
"resources/sample_settings.yaml" = "resources/sample_settings.yaml"
[tool.pytest.ini_options]
+markers = [
+ "llm: tests that require a real LLM (run locally with env set; excluded from CI except run-llm-tests.yml)",
+]
filterwarnings = [
"ignore:websockets\\.legacy is deprecated:DeprecationWarning",
"ignore:websockets\\.server\\.WebSocketServerProtocol is deprecated:DeprecationWarning",
diff --git a/resources/sample_settings.yaml b/resources/sample_settings.yaml
index 94963b1e..52fd22ae 100644
--- a/resources/sample_settings.yaml
+++ b/resources/sample_settings.yaml
@@ -44,13 +44,15 @@ experiment:
mq:
enabled: false
- type: redis # or kafka or mofka; Please adjust the port (kafka's default is 9092; redis is 6379). If mofka, adjust the group_file.
+ type: redis # or kafka, mofka, rabbitmq; adjust port accordingly (redis: 6379, kafka: 9092, rabbitmq: 5672). If mofka, also set group_file.
host: localhost
# uri: ?
# instances: ["localhost:6379"] # We can have multiple MQ instances being accessed by the consumers but each interceptor will currently access one single MQ..
port: 6379
# group_id: auto # Kafka-only consumer group id. Use "auto" to generate a unique group per run.
# group_file: mofka.json
+ # username: guest # RabbitMQ only (AMQP); default is "guest"
+ # vhost: / # RabbitMQ only; default is "/"
channel: interception
buffer_size: 50
insertion_buffer_time_secs: 5
@@ -107,6 +109,7 @@ agent:
chat_enabled: true # Enable the /api/v1/chat endpoint (requires the llm_agent extra and LLM settings above).
chat_max_tool_iterations: 5 # Max LLM tool-calling iterations per chat message.
chat_max_query_limit: 1000 # Hard cap for records returned by chat LLM query tools.
+ agent_mode: disabled # How the MCP agent is deployed: disabled | separate | colocated
databases:
diff --git a/src/flowcept/README.md b/src/flowcept/README.md
index 5317e89b..7f4f3dfa 100644
--- a/src/flowcept/README.md
+++ b/src/flowcept/README.md
@@ -19,6 +19,20 @@ This directory contains the runtime package. Use this README as a code-orientati
- `report/`: workflow card and PDF report generation.
- `webservice/`: FastAPI read-only REST API.
+## Separation of Concerns
+
+Keep each module focused on one layer. Do not mix HTTP, orchestration, and persistence
+logic in the same place.
+
+- UI / Dashboard / user client → HTTP route → small service layer → `DBAPI` → DAO
+ (`MongoDBDAO` or `LMDBDAO`) → MongoDB / LMDB.
+- UI / Dashboard / chat route → LangChain / LangGraph orchestrator → MCP tool calls
+ → `DBAPI` or in-memory runtime objects.
+
+The webservice package should stay thin. The agents package should own orchestration
+and tool logic. Persistence stays in DAO files. Runtime queries can read the active
+DataFrame or workflow object in memory when that is the right source of truth.
+
## Code Rules
- Keep config defaults and env-var reads in `configs.py`; do not hardcode runtime config elsewhere.
diff --git a/src/flowcept/agents/README.md b/src/flowcept/agents/README.md
index 4fb78c8e..633faa5d 100644
--- a/src/flowcept/agents/README.md
+++ b/src/flowcept/agents/README.md
@@ -1,168 +1,181 @@
# Flowcept Agent
-This package contains the Flowcept MCP server, client helpers, tools, prompts,
-context manager, and optional UI pieces.
+This package contains the Flowcept MCP server, client helpers, data-query tools,
+MCP-wrapper tools, prompts, context manager, and LLM infrastructure.
+
+For code-assistant behavior, use the repository root `AGENTS.md`. Runtime usage
+docs live in `docs/agent.rst`.
+
+## What Lives Here
+
+- `chat_orchestration/`: LangChain / LangGraph orchestration for the web chat.
+ This is where the chat runtime, tool routing, and turn-level orchestration live.
+ It should stay separate from HTTP route handlers.
+- `mcp/`: the standalone MCP server surface. Keep these wrappers thin. They should
+ load context, call tools, and return `ToolResult`, but not own business logic.
+- `mcp/mcp_tools/`: MCP wrappers around shared tool cores. These are the public MCP
+ entry points that external assistants call.
+- `data_query_tools/`: shared query logic. This is where task, workflow, object, and
+ DataFrame query behavior lives. These modules can call `DBAPI` for persisted data
+ or read the in-memory DataFrame / workflow object for runtime questions.
+- `prompts/`: prompt-builder functions and prompt registrations. Keep them as plain
+ Python builders that return strings, not Jinja templates.
+- `provenance_schema_manager/`: schema introspection and documentation context used by
+ prompt builders.
+- `llm/`: model construction and normalization helpers. Centralize LLM creation here.
+- `gui/`: legacy UI helpers. Do not extend this unless the old GUI is being revived.
+
+## Directory Layout
-For code-assistant behavior, use the repository root `AGENTS.md`. Do not
-duplicate agent rules here. Runtime usage docs live in `docs/agent.rst`.
-
-## One Agent, Two Orchestrators
-
-Flowcept Agent has one shared backend and two orchestration paths.
-
-Both paths use the same MCP server, in-memory context, tools, prompts, and
-execution functions. The only intended difference is who does routing and LLM
-reasoning:
-
-- **Internal LLM mode:** Flowcept builds the configured LLM and orchestrates.
-- **External LLM mode:** Codex, Claude, LibreChat, Cursor, or another assistant
- orchestrates and calls Flowcept MCP prompts/tools.
-
-## Shared Backend
-
-- `flowcept_agent.py` starts the MCP server.
-- `flowcept_ctx_manager.py` owns the live task/object/workflow context.
-- `tools/general_tools.py` exposes `prompt_handler` and shared commands.
-- `tools/in_memory_queries/` queries task/object DataFrames.
-- `tools/workflow_query_tools.py` queries the active workflow message object.
-- `prompts/` builds prompts for internal and external LLM generation.
-- `agents_utils.py` builds the configured internal LLM when Flowcept owns
- orchestration.
-
-## Internal LLM Mode
-
-Use this when Flowcept should route free-text messages itself.
-
-```yaml
-agent:
- external_llm: false
+```
+agents/
+ context_manager.py # FlowceptAgentContextManager, mcp_flowcept, get_df_context
+ tool_result.py # ToolResult Pydantic model (2xx/3xx/4xx/5xx conventions)
+
+ llm/
+ builders.py # build_llm_model(), normalize_message()
+ providers/
+ claude_gcp.py # ClaudeOnGCPLLM (Vertex AI)
+ gemini25.py # Gemini25LLM
+
+ chat_orchestration/
+ chat_orchestrator_service.py # LangGraph + MemorySaver chat turn orchestration
+
+ provenance_schema_manager/
+ static_schema_builder.py # SCHEMA_CONTEXT, build_schema_context, assert_schema_documented
+ dynamic_schema_tracker.py # Tracks evolving task/object schemas from live messages
+
+ data_query_tools/ # Plain-Python tool cores — NO MCP imports
+ db_query_tools.py # query_tasks, query_workflows, get_task_summary, …
+ in_memory_task_query_tools.py # run_df_query, generate_result_df, …
+ in_memory_workflow_query_tools.py # execute_generated_workflow_query, run_workflow_query
+ pandas_utils.py # safe_execute, normalize_output, format_result_df, …
+
+ mcp/
+ mcp_server.py # MCP server entry point (start with `flowcept --start-agent`)
+ mcp_client.py # Client helpers: run_tool(), run_prompt()
+ mcp_tools/ # Thin MCP wrappers over data_query_tools/
+ db_query_mcp_tools.py
+ in_memory_task_query_mcp_tools.py
+ in_memory_workflow_query_mcp_tools.py
+ session_tools.py # check_liveness, check_llm, record_guidance, reset_context, …
+ report_tools.py # generate_workflow_card
+
+ prompts/
+ README.md # Prompt authoring rules
+ base_prompts.py # BASE_ROLE, build_single_task_prompt, build_multitask_prompt
+ db_query_prompts.py # build_db_filter_prompt
+ in_memory_task_query_prompts.py # Pandas code / plot prompt builders
+ in_memory_workflow_query_prompts.py # Workflow message query prompt builders
+ chat_prompts.py # build_chat_system_prompt() for the webservice chat
+ mcp_prompts.py # @mcp_flowcept.prompt() registrations
```
-Typical path:
+## One Agent, Two Orchestrators
-1. A client calls `prompt_handler(message)`.
-2. Flowcept builds the configured model with `build_llm_model()`.
-3. Flowcept classifies the message with the routing prompt.
-4. Flowcept calls the same MCP tools used by the external path.
-5. Tool results are returned to the client.
+The MCP agent exposes explicit tools. Claude Code, Codex, LibreChat, or another
+assistant can call MCP prompt-builders and execution tools directly.
-This mode supports natural-language routing through `prompt_handler`, including
-task/object DataFrame questions, plots, small talk, records, context reset, and
-direct DataFrame code execution.
+The webservice chat path is the sister module that owns the HTTP-facing chat UI.
+Its route layer stays thin and delegates to the chat orchestrator in
+`src/flowcept/webservice/services/`. That orchestrator calls into the same shared
+tool cores used by the MCP surface.
-## External LLM Mode
+## Schema Context
-Use this when an outside assistant should own reasoning and planning.
+`SCHEMA_CONTEXT` (module-level dict in `provenance_schema_manager/static_schema_builder.py`) is populated at
+MCP server startup via `build_schema_context()`. It maps:
-```yaml
-agent:
- external_llm: true
+```python
+{
+ "task_fields": [...], # TaskObject attribute docs
+ "workflow_fields": [...], # WorkflowObject attribute docs
+ "telemetry_summary_fields": [...], # TelemetrySummary + subclass docs
+ ...
+}
```
-Typical path:
-
-1. The outside assistant calls a Flowcept MCP prompt builder.
-2. The outside assistant sends that prompt to its own LLM.
-3. The outside assistant calls the matching Flowcept execution tool.
-4. Flowcept executes against the same live in-memory context.
-
-In this mode, arbitrary free-text messages sent to `prompt_handler` are not
-internally routed. This prevents Flowcept from silently becoming the planner
-when the outside assistant is supposed to plan.
+All prompt builders in `prompts/` use `SCHEMA_CONTEXT` for field tables instead
+of hardcoded strings. The MCP server refuses to start if any non-private field
+is undocumented (`SchemaDocumentationError`).
## Equivalent Tool Paths
-| Capability | Internal orchestration | External orchestration |
+| Capability | Internal | External |
|---|---|---|
-| Task DataFrame question | `prompt_handler("...")` -> `run_df_query(...)` | `build_df_query_prompt(...)` -> external LLM -> `execute_generated_df_code(...)` |
-| Object DataFrame question | `prompt_handler("o: ...")` -> `run_df_query(context_kind="objects")` | `build_df_query_prompt(context_kind="objects")` -> external LLM -> `execute_generated_df_code(context_kind="objects")` |
-| Workflow metadata question | `prompt_handler("w: ...")` -> `run_workflow_query(...)` | `build_workflow_query_prompt(...)` -> external LLM -> `execute_generated_workflow_query(...)` |
-| Direct DataFrame code | `prompt_handler("result = df ...")` | `execute_generated_df_code("result = df ...")` |
-| Context reset and records | `prompt_handler("reset context")`, `@record`, `@show records`, `@reset records` | Same tools/commands |
-| Provenance reports | Flowcept report tools | Same report tools called explicitly |
-
-## Prefix Shortcuts
+| Task DF question | `run_df_query` | `build_df_query_prompt` → LLM → `execute_generated_df_code` |
+| Object DF question | `run_df_query(context_kind="objects")` | same, `context_kind="objects"` |
+| Workflow question | `run_workflow_query` | `build_workflow_query_prompt` → LLM → `execute_generated_workflow_query` |
+| DB provenance | `query_tasks` / `query_workflows` | same tools |
+| Reports | `generate_workflow_card` | same tool |
-These shortcuts are accepted by `prompt_handler` in both modes:
+## PROV-AGENT Instrumentation
-- `t: ` queries the task DataFrame.
-- `o: ` queries the object DataFrame.
-- `w: ` queries the workflow message object.
-- `result = df ...` executes explicit pandas code.
-- `save` saves the current DataFrame context.
-- `reset context`, `@record`, `@show records`, and `@reset records` manage
- context and guidance.
+Flowcept tracks AI agent provenance following the **PROV-AGENT** model
+(arXiv:2508.02866), a W3C PROV extension for agentic workflows.
+Two `subtype` values from `flowcept.commons.vocabulary.PROV_AGENT` identify
+agent-specific activities in the task database:
-Important nuance: prefix shortcuts are convenience paths. If a shortcut needs
-LLM generation, the current implementation may build Flowcept's configured LLM.
-For strict external orchestration, use prompt-builder tools plus execution tools.
-
-## Start The MCP Server
+| Enum | Stored string | What it captures |
+|---|---|---|
+| `PROV_AGENT.AI_MODEL_INVOCATION` | `"ai_model_invocation"` | One LLM prompt → response call |
+| `PROV_AGENT.AGENT_TOOL` | `"agent_tool"` | One tool execution by an AI agent |
-Prefer the CLI:
+### Automatic capture
-```bash
-flowcept --start-agent
-```
+**MCP tools** — every `@mcp_flowcept.tool()` function in `mcp_tools/` is also
+decorated with `@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)`. No extra
+code needed; tool calls are stored automatically when the interceptor is running.
-Equivalent module form:
+**LLM calls** — wrap any LangChain model with `FlowceptLLM` to record every
+`.invoke()` as `PROV_AGENT.AI_MODEL_INVOCATION`:
-```bash
-python -m flowcept.agents.flowcept_agent
+```python
+from flowcept.instrumentation.flowcept_agent_task import FlowceptLLM
+wrapped = FlowceptLLM(llm, agent_id=my_agent_id)
+response = wrapped.invoke("How many tasks failed?")
```
-Run from a Python environment where Flowcept is installed.
+**LangGraph chat** — `run_chat` in `chat_orchestration/chat_orchestrator_service.py`
+wraps each graph execution in a `Flowcept` context (`workflow_name="Flowcept LangGraph Chat"`,
+`start_persistence=True`). This gives every chat turn its own `workflow_id`.
+Within the graph, `call_model` uses `FlowceptLLM` and `call_tools` uses
+`FlowceptTask(subtype=PROV_AGENT.AGENT_TOOL)` — both inherit
+`Flowcept.current_workflow_id` automatically.
-## Internal Prompt Handler Example
+### Querying agent provenance
```python
-from flowcept.agents.agent_client import run_tool
+# All LLM calls by a specific agent
+Flowcept.db.task_query(filter={"subtype": "ai_model_invocation", "agent_id": my_agent_id})
-result = run_tool(
- "prompt_handler",
- kwargs={"message": "What are the top 5 slowest activities?"},
-)
+# All tool executions in a chat session (workflow)
+Flowcept.db.task_query(filter={"subtype": "agent_tool", "workflow_id": thread_id})
```
-## External DataFrame Query Example
-
-```python
-from flowcept.agents.agent_client import run_prompt, run_tool
+The UI uses `subtype` to display AI agent workflows differently from regular
+scientific workflow tasks.
-prompt = run_prompt(
- "build_df_query_prompt",
- args={"query": "What are the top 5 slowest activities?", "context_kind": "tasks"},
-)
+See `docs/schemas.rst` → *PROV-AGENT and Flowcept* for the full data model and
+paper reference.
-# The external assistant sends `prompt` to its own LLM and gets pandas code.
-generated_code = (
- "result = df.assign(duration=(df['ended_at'] - df['started_at']))"
- ".groupby('activity_id', dropna=False)['duration']"
- ".mean().sort_values(ascending=False).head(5)"
- ".reset_index(name='avg_duration')"
-)
+## Starting the MCP Server
-result = run_tool(
- "execute_generated_df_code",
- kwargs={"user_code": generated_code, "context_kind": "tasks"},
-)
+```bash
+flowcept --start-agent
```
-## External Workflow Query Example
+## Client Usage
```python
-from flowcept.agents.agent_client import run_prompt, run_tool
+from flowcept.agents.mcp.mcp_client import run_tool, run_prompt
-prompt = run_prompt(
- "build_workflow_query_prompt",
- args={"query": "What settings path was used?"},
-)
+# Call a tool
+result = run_tool("run_df_query", kwargs={"query": "top 5 slowest activities", "context_kind": "tasks"})
-# The external assistant sends `prompt` to its own LLM and gets a JSON spec.
-query_spec = {"field_paths": ["conf.settings_path"], "missing": [], "answer_style": "short"}
-
-result = run_tool(
- "execute_generated_workflow_query",
- kwargs={"query_spec": query_spec},
+# Use a prompt builder (external LLM mode)
+prompt = run_prompt(
+ "build_df_query_prompt",
+ args={"query": "top 5 slowest activities", "context_kind": "tasks"},
)
```
diff --git a/src/flowcept/agents/__init__.py b/src/flowcept/agents/__init__.py
index f24686ac..b248dd24 100644
--- a/src/flowcept/agents/__init__.py
+++ b/src/flowcept/agents/__init__.py
@@ -1,7 +1,7 @@
# flake8: noqa: F403
"""Agents subpackage."""
-from flowcept.agents.tools.general_tools import *
-from flowcept.agents.tools.in_memory_queries.in_memory_queries_tools import *
-from flowcept.agents.tools.db_prov_tools import *
-from flowcept.agents.tools.workflow_query_tools import *
+from flowcept.agents.tool_result import ToolResult # noqa: F401
+from flowcept.agents.mcp.mcp_tools import *
+from flowcept.agents.mcp.mcp_tools.df_query_mcp_tools import *
+from flowcept.agents.mcp.mcp_tools.db_query_mcp_tools import *
diff --git a/src/flowcept/agents/agents_utils.py b/src/flowcept/agents/agents_utils.py
deleted file mode 100644
index ae6c3e7f..00000000
--- a/src/flowcept/agents/agents_utils.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import os
-import re
-import unicodedata
-from typing import Union, Dict
-
-from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
-from flowcept.instrumentation.flowcept_agent_task import FlowceptLLM, get_current_context_task
-
-from flowcept.configs import AGENT
-from pydantic import BaseModel
-
-
-class ToolResult(BaseModel):
- """
- ToolResult is a standardized wrapper for tool outputs, encapsulating
- status codes, results, and optional metadata.
-
- This class provides conventions for interpreting the output of tools
- (e.g., LLM calls, DataFrame operations, plotting functions) and ensures
- consistent handling of both successes and errors.
-
- Conventions
- -----------
- - **2xx: Success (string result)**
- - Result is the expected output as a string.
- - Example: ``201`` → operation completed successfully.
-
- - **3xx: Success (dict result)**
- - Result is the expected output as a dictionary.
- - Example: ``301`` → operation completed successfully.
-
- - **4xx: Error (string message)**
- - System or agent internal error, returned as a string message.
- - ``400``: LLM call problem (e.g., server connection or token issues).
- - ``404``: Empty or ``None`` result.
- - ``405``: LLM responded, but format was wrong.
- - ``406``: Error executing Python code.
- - ``499``: Other uncategorized error.
-
- - **5xx: Error (dict result)**
- - System or agent internal error, returned as a structured dictionary.
-
- - **None**
- - Result not yet set or tool did not return anything.
-
- Attributes
- ----------
- code : int or None
- Status code indicating success or error category.
- result : str or dict, optional
- The main output of the tool (string, dict, or error message).
- extra : dict or str or None
- Additional metadata or debugging information.
- tool_name : str or None
- Name of the tool that produced this result.
-
- Methods
- -------
- result_is_str() -> bool
- Return True if the result should be interpreted as a string.
- is_success() -> bool
- Return True if the result represents any type of success.
- is_success_string() -> bool
- Return True if the result is a success with a string output (2xx).
- is_error_string() -> bool
- Return True if the result is an error with a string message (4xx).
- is_success_dict() -> bool
- Return True if the result is a success with a dict output (3xx).
-
- Examples
- --------
- >>> ToolResult(code=201, result="Operation successful")
- ToolResult(code=201, result='Operation successful')
-
- >>> ToolResult(code=301, result={"data": [1, 2, 3]})
- ToolResult(code=301, result={'data': [1, 2, 3]})
-
- >>> ToolResult(code=405, result="Invalid format from LLM")
- ToolResult(code=405, result='Invalid format from LLM')
- """
-
- code: int | None = None
- result: Union[str, Dict] = None
- extra: Dict | str | None = None
- tool_name: str | None = None
-
- def result_is_str(self) -> bool:
- """Returns True if the result is a string."""
- return (200 <= self.code < 300) or (400 <= self.code < 500)
-
- def is_success(self):
- """Returns True if the result is a success."""
- return self.is_success_string() or self.is_success_dict()
-
- def is_success_string(self):
- """Returns True if the result is a success string."""
- return 200 <= self.code < 300
-
- def is_error_string(self):
- """Returns True if the result is an error string."""
- return 400 <= self.code < 500
-
- def is_success_dict(self) -> bool:
- """Returns True if the result is a success dictionary."""
- return 300 <= self.code < 400
-
-
-def build_llm_model(
- model_name=None,
- model_kwargs=None,
- service_provider=None,
- agent_id=BaseAgentContextManager.agent_id,
- track_tools=True,
- return_response_object=False,
-) -> FlowceptLLM:
- """
- Build and return an LLM instance using agent configuration.
-
- This function retrieves the model name and keyword arguments from the AGENT configuration,
- constructs a SambaStudio LLM instance, and returns it.
-
- Returns
- -------
- LLM
- An initialized LLM object configured using the `AGENT` settings.
- """
- _model_kwargs = (AGENT.get("model_kwargs") or {}).copy()
- if model_kwargs is not None:
- for k in model_kwargs:
- _model_kwargs[k] = model_kwargs[k]
-
- if "model" not in _model_kwargs:
- _model_kwargs["model"] = AGENT.get("model", model_name)
-
- if service_provider:
- _service_provider = service_provider
- else:
- _service_provider = AGENT.get("service_provider")
-
- if _service_provider == "sambanova":
- from langchain_community.llms.sambanova import SambaStudio
-
- os.environ["SAMBASTUDIO_URL"] = os.environ.get("SAMBASTUDIO_URL", AGENT.get("llm_server_url"))
- os.environ["SAMBASTUDIO_API_KEY"] = os.environ.get("SAMBASTUDIO_API_KEY", AGENT.get("api_key"))
-
- llm = SambaStudio(model_kwargs=_model_kwargs)
- elif _service_provider == "azure":
- from langchain_openai.chat_models.azure import AzureChatOpenAI
-
- api_key = os.environ.get("AZURE_OPENAI_API_KEY", AGENT.get("api_key", None))
- service_url = os.environ.get("AZURE_OPENAI_API_ENDPOINT", AGENT.get("llm_server_url", None))
- llm = AzureChatOpenAI(
- azure_deployment=_model_kwargs.get("model"), azure_endpoint=service_url, api_key=api_key, **_model_kwargs
- )
- elif _service_provider == "openai":
- from langchain_openai import ChatOpenAI
-
- api_key = os.environ.get("OPENAI_API_KEY", AGENT.get("api_key", None))
- base_url = os.environ.get("OPENAI_BASE_URL", AGENT.get("llm_server_url") or None)
- org = os.environ.get("OPENAI_ORG_ID", AGENT.get("organization", None))
-
- init_kwargs = {"api_key": api_key}
- if base_url:
- init_kwargs["base_url"] = base_url
- if org:
- init_kwargs["organization"] = org
-
- llm = ChatOpenAI(**init_kwargs, **_model_kwargs)
- elif _service_provider == "google":
- if "claude" in _model_kwargs["model"]:
- api_key = os.environ.get("GOOGLE_API_KEY", AGENT.get("api_key", None))
- _model_kwargs["model_id"] = _model_kwargs.pop("model")
- _model_kwargs["google_token_auth"] = api_key
- from flowcept.agents.llms.claude_gcp import ClaudeOnGCPLLM
-
- llm = ClaudeOnGCPLLM(**_model_kwargs)
- elif "gemini" in _model_kwargs["model"]:
- from flowcept.agents.llms.gemini25 import Gemini25LLM
-
- llm = Gemini25LLM(**_model_kwargs)
- else:
- raise Exception("Currently supported providers are sambanova, openai, azure, and google.")
- if track_tools:
- llm = FlowceptLLM(llm, return_response_object=return_response_object)
- if agent_id is None:
- agent_id = BaseAgentContextManager.agent_id
- llm.agent_id = agent_id
- if track_tools:
- tool_task = get_current_context_task()
- if tool_task:
- llm.parent_task_id = tool_task.task_id
- return llm
-
-
-def normalize_message(user_msg: str) -> str:
- """
- Normalize a user message into a canonical, comparison-friendly form.
-
- The function standardizes text by trimming whitespace, applying Unicode
- normalization, normalizing dash characters, collapsing repeated whitespace,
- removing trailing punctuation that does not affect semantics, and converting
- the result to lowercase.
-
- Parameters
- ----------
- user_msg : str
- Raw user input message.
-
- Returns
- -------
- str
- Normalized message suitable for matching, comparison, or hashing.
- """
- # 1) Strip leading/trailing whitespace
- user_msg = user_msg.strip()
-
- # 2) Unicode normalize to avoid weird characters (like fancy quotes, dashes)
- user_msg = unicodedata.normalize("NFKC", user_msg)
-
- # 3) Normalize dashes commonly used in chemistry (C–H, C—H, etc.)
- user_msg = user_msg.replace("–", "-").replace("—", "-")
-
- # 4) Collapse multiple spaces / newlines into a single space
- user_msg = re.sub(r"\s+", " ", user_msg)
-
- # 5) Remove trailing punctuation that doesn't change semantics
- # e.g., "?", "!", "." at the VERY end
- user_msg = re.sub(r"[?!.\s]+$", "", user_msg)
-
- user_msg = user_msg.lower()
-
- return user_msg
diff --git a/src/flowcept/agents/chat_orchestration/__init__.py b/src/flowcept/agents/chat_orchestration/__init__.py
new file mode 100644
index 00000000..e3c007b6
--- /dev/null
+++ b/src/flowcept/agents/chat_orchestration/__init__.py
@@ -0,0 +1 @@
+"""LangChain/LangGraph orchestration for Flowcept agent chat."""
diff --git a/src/flowcept/agents/chat_orchestration/chat_orchestrator_service.py b/src/flowcept/agents/chat_orchestration/chat_orchestrator_service.py
new file mode 100644
index 00000000..2ed02aa0
--- /dev/null
+++ b/src/flowcept/agents/chat_orchestration/chat_orchestrator_service.py
@@ -0,0 +1,683 @@
+"""LLM chat orchestration for the webservice: LangGraph + MemorySaver tool-calling loop."""
+
+from __future__ import annotations
+
+import json
+import uuid
+from typing import Any, Dict, Generator, List, Optional
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import END, MessagesState, StateGraph
+
+from langgraph.errors import GraphRecursionError
+
+from flowcept.agents.prompts.chat_prompts import build_chat_system_prompt
+from flowcept.agents.mcp.mcp_client import run_tool
+from flowcept.commons.flowcept_logger import FlowceptLogger
+from flowcept.commons.utils import sanitize_json_like
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.configs import AGENT_CHAT_MAX_TOOL_ITERATIONS, AGENT_CHAT_MAX_TOOL_RESULT_CHARS, INSTRUMENTATION_ENABLED
+
+MAX_TOOL_ITERATIONS = AGENT_CHAT_MAX_TOOL_ITERATIONS
+# Cap individual tool result strings fed into LangGraph state to prevent context overflow.
+_MAX_TOOL_RESULT_CHARS = AGENT_CHAT_MAX_TOOL_RESULT_CHARS
+CHAT_WORKFLOW_NAME = "Flowcept LangGraph Chat"
+
+# Module-level saver — persists across requests keyed by thread_id.
+_memory = MemorySaver()
+
+
+def _format_error(exc: BaseException, _depth: int = 0) -> str:
+ """Return a user-facing error string, unwrapping ExceptionGroup to its real cause."""
+ if _depth > 5:
+ return str(exc) or type(exc).__name__
+ if hasattr(exc, "exceptions"): # ExceptionGroup / BaseExceptionGroup (Python 3.11+)
+ inner = "; ".join(_format_error(sub, _depth + 1) for sub in exc.exceptions)
+ return (
+ f"A tool call failed ({inner}). "
+ "This may be a transient service error — try rephrasing your question "
+ "or narrowing the scope (e.g. add a workflow_id or campaign_id)."
+ )
+ if exc.__cause__ is not None:
+ return _format_error(exc.__cause__, _depth + 1)
+ return str(exc) or type(exc).__name__
+
+
+def _build_langchain_tools(context: Optional[Dict[str, Any]], allow_dashboard_edit: bool):
+ """Wrap MCP tools as LangChain tools."""
+ from langchain_core.tools import tool
+
+ def _run_mcp(tool_name: str, **kwargs) -> str:
+ return run_tool(tool_name, kwargs=kwargs)[0]
+
+ def _coerce_projection(p: Any) -> Optional[List[str]]:
+ """Accept a list of field names or a Mongo projection dict {field: 1}."""
+ if p is None:
+ return None
+ if isinstance(p, dict):
+ return [k for k, v in p.items() if v]
+ return list(p)
+
+ def _coerce_sort(s: Any) -> Optional[List[Dict[str, Any]]]:
+ """Accept [{field, order}] or a Mongo sort dict {field: -1}."""
+ if s is None:
+ return None
+ if isinstance(s, dict):
+ return [{"field": k, "order": v} for k, v in s.items()]
+ return list(s)
+
+ def _scoped_filter(filter: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+ """Apply workflow/campaign scope from the HTTP context."""
+ scoped = dict(filter or {})
+ for key in ("workflow_id", "campaign_id"):
+ if (context or {}).get(key):
+ scoped[key] = context[key]
+ return scoped
+
+ @tool
+ def query_tasks(
+ filter: Optional[Dict[str, Any]] = None,
+ projection: Optional[Any] = None,
+ limit: int = 100,
+ sort: Optional[Any] = None,
+ ) -> str:
+ """Query task provenance records with a Mongo-style filter.
+
+ projection: list of field names, or a Mongo projection dict {"field": 1}.
+ sort: list of {"field": "...", "order": 1|-1}, or a Mongo sort dict {"field": -1}.
+ """
+ return _run_mcp(
+ "query_tasks",
+ filter=_scoped_filter(filter),
+ projection=_coerce_projection(projection),
+ limit=limit,
+ sort=_coerce_sort(sort),
+ )
+
+ @tool
+ def query_workflows(filter: Optional[Dict[str, Any]] = None, limit: int = 100) -> str:
+ """Query workflow provenance records with a Mongo-style filter."""
+ return _run_mcp("query_workflows", filter=_scoped_filter(filter), limit=limit)
+
+ @tool
+ def get_task_summary(filter: Optional[Dict[str, Any]] = None) -> str:
+ """Summarize tasks: status counts, per-activity durations, and time range."""
+ return _run_mcp("get_task_summary", filter=_scoped_filter(filter))
+
+ @tool
+ def list_campaigns(campaign_id: Optional[str] = None) -> str:
+ """List derived campaign summaries (campaigns group workflows and tasks).
+
+ campaign_id: when provided, returns only that campaign's summary.
+ Always pass the campaign_id from the user context to scope the result.
+ """
+ effective_id = campaign_id or (context or {}).get("campaign_id")
+ return _run_mcp("list_campaigns", campaign_id=effective_id)
+
+ @tool
+ def list_agents() -> str:
+ """List derived agent summaries (agents observed in task provenance).
+
+ Automatically scoped to the current workflow when workflow_id is in context.
+ """
+ workflow_id = (context or {}).get("workflow_id")
+ effective_filter = {"workflow_id": workflow_id} if workflow_id else None
+ return _run_mcp("list_agents", filter=effective_filter)
+
+ @tool
+ def make_chart(card_spec: Dict[str, Any]) -> str:
+ """Build a chart from a declarative dashboard card spec; the UI renders the result."""
+ scoped_spec = dict(card_spec)
+ data_spec = dict(scoped_spec.get("data") or {})
+ data_spec["filter"] = _scoped_filter(data_spec.get("filter"))
+ scoped_spec["data"] = data_spec
+ return _run_mcp("make_chart", card_spec=scoped_spec, context=None)
+
+ @tool
+ def highlight_lineage(
+ task_ids: Optional[Any] = None,
+ filter: Optional[Dict[str, Any]] = None,
+ ) -> str:
+ """Highlight the full provenance lineage (ancestors + descendants) of tasks in the Dataflow graph.
+
+ Pass `task_ids` as a list of task ID strings, or a single task ID string.
+ Or use `filter` to find the seed tasks first.
+ The UI will dim all other nodes and visually trace the lineage chain.
+ Always pass a workflow_id in the filter when on a workflow page.
+ """
+ wf_id = (context or {}).get("workflow_id")
+ ids: Optional[List[str]] = None
+ if task_ids is not None:
+ ids = [task_ids] if isinstance(task_ids, str) else list(task_ids)
+ return _run_mcp("highlight_lineage", task_ids=ids, filter=filter, workflow_id=wf_id)
+
+ def _query_text(query: Any) -> str:
+ if isinstance(query, str):
+ return query
+ return json.dumps(query, default=str)
+
+ @tool("generate_result_df")
+ def generate_result_df(query: Any) -> str:
+ """Answer questions about task execution using the in-memory tasks DataFrame.
+
+ Use for questions about WHAT HAPPENED during the workflow: activities, task inputs/outputs,
+ timing, telemetry, agent actions, configuration parameters passed as task inputs, task counts,
+ lineage, and execution order. Each DataFrame row is a task record.
+
+ Do NOT use for questions about the inherent properties of stored data artifacts (models,
+ datasets, files) — use generate_objects_df for those.
+ """
+ return _run_mcp("run_df_query", query=_query_text(query), plot=False, context_kind="tasks")
+
+ @tool("generate_plot_code")
+ def generate_plot_code(query: Any = None, card_spec: Optional[Dict[str, Any]] = None) -> str:
+ """Generate plotting output using the MCP server's in-memory task DataFrame."""
+ query_payload = query if query is not None else card_spec
+ return _run_mcp("run_df_query", query=_query_text(query_payload), plot=True, context_kind="tasks")
+
+ @tool
+ def extract_or_fix_python_code(raw_text: str, runtime_error: Optional[str] = None) -> str:
+ """Extract or repair pandas code using the MCP server's in-memory task DataFrame columns."""
+ return _run_mcp(
+ "extract_or_fix_python_code",
+ raw_text=raw_text,
+ runtime_error=runtime_error,
+ context_kind="tasks",
+ )
+
+ @tool
+ def get_workflow_context() -> str:
+ """Return the workflow record loaded in the agent's in-memory context (DF path counterpart to query_workflows).
+
+ Use this tool ONLY when the question is specifically about workflow-level metadata: workflow name,
+ campaign, start/end timestamps, owner/user, description, hardware, or workflow structure.
+ Do NOT call this tool for questions about tasks, activities, agents, data artifacts, or model parameters —
+ use generate_result_df or generate_objects_df for those instead.
+ """
+ return _run_mcp("get_workflow_context")
+
+ @tool
+ def query_objects(
+ filter: Optional[Dict[str, Any]] = None,
+ projection: Optional[Any] = None,
+ limit: int = 100,
+ ) -> str:
+ """Query stored data-object records (ML models, datasets, blobs) by their inherent properties.
+
+ Use when the question asks about WHAT AN ARTIFACT IS — e.g. model training technique,
+ optimizer, number of parameters or weights, purpose or designed uses, science domain, loss,
+ dataset sample count or split ratio, object type, file size, or any custom_metadata field.
+ Filter by ``workflow_id`` or ``object_type`` (``"ml_model"``, ``"dataset"``).
+ ``custom_metadata`` sub-fields use dot-notation, e.g. ``custom_metadata.model_profile.params``.
+
+ Do NOT use for questions about task execution — use query_tasks for those.
+ """
+ # Objects have no campaign_id field; scope by workflow_id only.
+ obj_filter = dict(filter or {})
+ if (context or {}).get("workflow_id"):
+ obj_filter["workflow_id"] = context["workflow_id"]
+ return _run_mcp(
+ "query_objects",
+ filter=obj_filter,
+ projection=_coerce_projection(projection),
+ limit=limit,
+ )
+
+ @tool("generate_objects_df")
+ def generate_objects_df(query: Any) -> str:
+ """Answer questions about the inherent properties of stored data artifacts using the objects DataFrame.
+
+ Use when the question asks about WHAT AN ARTIFACT IS or WHAT IT CONTAINS — not what task
+ processed it. Examples: model training technique (custom_metadata.finetuning_technique),
+ parameter count (custom_metadata.n_params or custom_metadata.model_profile.params),
+ purpose or designed uses (custom_metadata.task_type), science domain
+ (custom_metadata.science_domain), loss, dataset sample count or split ratio, object type,
+ file size, or any field stored in custom_metadata. Each DataFrame row is an object record
+ with fields like object_type, custom_metadata.*, file_path, and workflow_id.
+
+ Do NOT use for questions about task execution (who ran tasks, timing, agent actions, task
+ inputs) — use generate_result_df for those.
+ """
+ return _run_mcp("run_df_query", query=_query_text(query), plot=False, context_kind="objects")
+
+ db_tools = [
+ query_tasks,
+ query_workflows,
+ get_task_summary,
+ list_campaigns,
+ list_agents,
+ make_chart,
+ highlight_lineage,
+ query_objects,
+ ]
+ df_tools = [
+ generate_result_df,
+ generate_plot_code,
+ extract_or_fix_python_code,
+ get_workflow_context,
+ list_agents,
+ generate_objects_df,
+ ]
+ tool_context = (context or {}).get("tool_context", "db")
+ if tool_context == "df":
+ tools = df_tools
+ else:
+ tools = db_tools
+
+ if allow_dashboard_edit:
+
+ @tool
+ def get_dashboard(dashboard_id: str) -> str:
+ """Get a stored dashboard spec by id."""
+ return _run_mcp("get_dashboard", dashboard_id=dashboard_id)
+
+ @tool
+ def update_dashboard(dashboard_id: str, spec: Dict[str, Any]) -> str:
+ """Replace a stored dashboard spec with a complete revised spec."""
+ return _run_mcp("update_dashboard", dashboard_id=dashboard_id, spec=spec)
+
+ tools += [get_dashboard, update_dashboard]
+ return tools
+
+
+def _with_workflow_schema_context(context: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+ """Attach MCP-owned workflow schema context to chat context when available."""
+ if not context or not context.get("workflow_id"):
+ return context
+ enriched = dict(context)
+ try:
+ payload = json.loads(run_tool("get_workflow_schema_context", kwargs={"workflow_id": context["workflow_id"]})[0])
+ if payload.get("code", 500) < 400 and isinstance(payload.get("result"), dict):
+ enriched["workflow_schema_context"] = payload["result"].get("prompt_context")
+ except Exception:
+ return enriched
+ return enriched
+
+
+def _build_graph(llm, tools, agent_id: Optional[str] = None, require_first_tool: bool = False):
+ """Build a LangGraph agent + tools graph compiled with the module-level MemorySaver."""
+ bound = llm.bind_tools(tools)
+ first_bound = llm.bind_tools(tools, tool_choice="required") if require_first_tool else bound
+ tools_by_name = {t.name: t for t in tools}
+
+ def _needs_first_tool(state: MessagesState) -> bool:
+ return require_first_tool and not any(isinstance(message, ToolMessage) for message in state["messages"])
+
+ def _latest_user_text(state: MessagesState) -> str:
+ for message in reversed(state["messages"]):
+ if isinstance(message, HumanMessage):
+ return str(message.content)
+ return ""
+
+ def _tool_calls_for_text(text: str) -> List[Dict[str, Any]]:
+ lower = text.lower()
+ names = set(tools_by_name)
+ has_specific_value = any(marker in lower for marker in ("task_id", "object_id", "workflow_id"))
+ if "generate_result_df" in names and any(
+ word in lower for word in ("submit", "submitted", "producer", "produced")
+ ):
+ # Pattern B: general attribution — query starts with "which/what" (no specific lookup
+ # value) and asks about the agent. list_agents alone is sufficient.
+ if (
+ "list_agents" in names
+ and "agent" in lower
+ and not has_specific_value
+ and lower.strip().startswith(("which ", "what "))
+ ):
+ return [{"name": "list_agents", "args": {}, "id": str(uuid.uuid4())}]
+ query = (
+ text + "\nInterpret submission/producer questions through provenance dataflow: "
+ "find upstream task rows whose generated.* values match used.* values consumed by the target activity, "
+ "then return the upstream activity_id and agent_id. "
+ "For work-item submission, prefer producer tasks with generated list/dict descriptors that map to "
+ "target used identifiers or parameters; do not treat dataset/file/artifact producers as submitters "
+ "unless the user explicitly asks about data artifacts. "
+ "If the named value appears inside a list of dictionaries in a generated.* field, "
+ "extract the full matching dictionary and include its key-value fields."
+ )
+ tool_calls = [{"name": "generate_result_df", "args": {"query": query}, "id": str(uuid.uuid4())}]
+ if "list_agents" in names:
+ tool_calls.append({"name": "list_agents", "args": {}, "id": str(uuid.uuid4())})
+ return tool_calls
+ if "list_agents" in names and "agent" in lower and not has_specific_value:
+ return [{"name": "list_agents", "args": {}, "id": str(uuid.uuid4())}]
+ if "get_task_summary" in names and any(
+ phrase in lower
+ for phrase in (
+ "lineage",
+ "data flow",
+ "execution order",
+ "how many",
+ "count",
+ "summary",
+ "duration",
+ )
+ ):
+ return [{"name": "get_task_summary", "args": {}, "id": str(uuid.uuid4())}]
+ if "make_chart" in names and any(word in lower for word in ("plot", "chart", "graph")):
+ return [
+ {
+ "name": "make_chart",
+ "args": {
+ "card_spec": {
+ "chart_id": "chat-chart",
+ "type": "chart",
+ "title": text,
+ "data": {
+ "source": "tasks",
+ "group_by": "activity_id",
+ "metrics": [{"agg": "count"}],
+ },
+ "viz": {"kind": "bar"},
+ }
+ },
+ "id": str(uuid.uuid4()),
+ }
+ ]
+ if "query_objects" in names and any(
+ phrase in lower for phrase in ("object type", "blob object", "artifact type", "object_type")
+ ):
+ return [{"name": "query_objects", "args": {}, "id": str(uuid.uuid4())}]
+ if "generate_objects_df" in names and any(
+ phrase in lower for phrase in ("object type", "blob object", "artifact type", "object_type")
+ ):
+ return [{"name": "generate_objects_df", "args": {"query": text}, "id": str(uuid.uuid4())}]
+ if "extract_or_fix_python_code" in names and ("fix" in lower or "python code" in lower or "dataframe" in lower):
+ return [{"name": "extract_or_fix_python_code", "args": {"raw_text": text}, "id": str(uuid.uuid4())}]
+ if "generate_plot_code" in names and any(word in lower for word in ("plot", "chart", "graph")):
+ return [{"name": "generate_plot_code", "args": {"query": text}, "id": str(uuid.uuid4())}]
+ if "generate_result_df" in names and any(word in lower for word in ("lineage", "execution order", "data flow")):
+ query = (
+ text
+ + "\nThe user is asking for workflow lineage/order. Return the ordered distinct activity_id values "
+ "from the workflow, using task timestamps or row order when timestamps are unavailable. "
+ "Include upstream, target, and downstream activities; do not answer only with metric-matching rows."
+ )
+ return [{"name": "generate_result_df", "args": {"query": query}, "id": str(uuid.uuid4())}]
+ if "generate_result_df" in names and "how many" in lower and any(w in lower for w in ("task", "tasks")):
+ query = (
+ text + "\nReturn a self-descriptive DataFrame, e.g. result = pd.DataFrame({'task_count': [len(df)]})"
+ " so the count is clearly labeled."
+ )
+ return [{"name": "generate_result_df", "args": {"query": query}, "id": str(uuid.uuid4())}]
+ if "generate_result_df" in names and any(
+ word in lower
+ for word in (
+ "activity",
+ "agent",
+ "count",
+ "how many",
+ "lineage",
+ "task",
+ )
+ ):
+ return [{"name": "generate_result_df", "args": {"query": text}, "id": str(uuid.uuid4())}]
+ if "query_workflows" in names and "workflow" in lower:
+ return [{"name": "query_workflows", "args": {}, "id": str(uuid.uuid4())}]
+ if "get_workflow_context" in names and any(word in lower for word in ("workflow", "workflows")):
+ # DF path: workflow records live in the MCP context object, not the tasks DataFrame.
+ # get_workflow_context is the DF-path counterpart to query_workflows.
+ return [{"name": "get_workflow_context", "args": {}, "id": str(uuid.uuid4())}]
+ if "generate_result_df" in names:
+ return [{"name": "generate_result_df", "args": {"query": text}, "id": str(uuid.uuid4())}]
+ return [{"name": next(iter(tools_by_name)), "args": {}, "id": str(uuid.uuid4())}]
+
+ def _enforce_first_tool(response: AIMessage, state: MessagesState) -> AIMessage:
+ if not _needs_first_tool(state):
+ return response
+ return AIMessage(content="", tool_calls=_tool_calls_for_text(_latest_user_text(state)))
+
+ if INSTRUMENTATION_ENABLED and agent_id is not None:
+ from flowcept.instrumentation.flowcept_agent_task import FlowceptLLM
+ from flowcept.instrumentation.task_capture import FlowceptTask
+
+ # workflow_id is resolved automatically from Flowcept.current_workflow_id
+ # which is set by the Flowcept context in run_chat.
+ instrumented_llm = FlowceptLLM(bound, agent_id=agent_id, return_response_object=True)
+
+ def call_model(state: MessagesState):
+ """Agent node: invoke the LLM with current messages (instrumented)."""
+ active_llm = (
+ FlowceptLLM(first_bound, agent_id=agent_id, return_response_object=True)
+ if _needs_first_tool(state)
+ else instrumented_llm
+ )
+ return {"messages": [_enforce_first_tool(active_llm.invoke(state["messages"]), state)]}
+
+ def call_tools(state: MessagesState):
+ """Tools node: execute all pending tool calls with provenance capture."""
+ last = state["messages"][-1]
+ tool_msgs = []
+ for tc in getattr(last, "tool_calls", []):
+ name = tc["name"]
+ args = tc.get("args") or {}
+ call_id = tc.get("id") or name
+ tool_fn = tools_by_name.get(name)
+ with FlowceptTask(
+ activity_id=name,
+ subtype=PROV_AGENT.AGENT_TOOL,
+ used=sanitize_json_like(args, mongo_safe_keys=True),
+ agent_id=agent_id,
+ ) as task:
+ output = (
+ tool_fn.invoke(args) if tool_fn is not None else json.dumps({"error": f"Unknown tool {name}"})
+ )
+ task.end(generated={"output": output[:500] if isinstance(output, str) else output})
+ if isinstance(output, str) and len(output) > _MAX_TOOL_RESULT_CHARS:
+ output = output[:_MAX_TOOL_RESULT_CHARS] + f"... [truncated, {len(output)} chars total]"
+ tool_msgs.append(ToolMessage(content=output, tool_call_id=call_id, name=name))
+ return {"messages": tool_msgs}
+
+ else:
+
+ def call_model(state: MessagesState):
+ """Agent node: invoke the LLM with current messages."""
+ response = (first_bound if _needs_first_tool(state) else bound).invoke(state["messages"])
+ return {"messages": [_enforce_first_tool(response, state)]}
+
+ def call_tools(state: MessagesState):
+ """Tools node: execute all pending tool calls and return ToolMessages."""
+ last = state["messages"][-1]
+ tool_msgs = []
+ for tc in getattr(last, "tool_calls", []):
+ name = tc["name"]
+ args = tc.get("args") or {}
+ call_id = tc.get("id") or name
+ tool_fn = tools_by_name.get(name)
+ output = tool_fn.invoke(args) if tool_fn is not None else json.dumps({"error": f"Unknown tool {name}"})
+ if isinstance(output, str) and len(output) > _MAX_TOOL_RESULT_CHARS:
+ output = output[:_MAX_TOOL_RESULT_CHARS] + f"... [truncated, {len(output)} chars total]"
+ tool_msgs.append(ToolMessage(content=output, tool_call_id=call_id, name=name))
+ return {"messages": tool_msgs}
+
+ def should_continue(state: MessagesState):
+ """Route to tools if the last AI message has tool calls; otherwise end."""
+ last = state["messages"][-1]
+ if isinstance(last, AIMessage) and getattr(last, "tool_calls", None):
+ return "tools"
+ return END
+
+ graph = StateGraph(MessagesState)
+ graph.add_node("agent", call_model)
+ graph.add_node("tools", call_tools)
+ graph.set_entry_point("agent")
+ graph.add_conditional_edges("agent", should_continue)
+ graph.add_edge("tools", "agent")
+ return graph.compile(checkpointer=_memory)
+
+
+def _prepare_input_messages(
+ messages: List[Dict[str, str]],
+ context: Optional[Dict[str, Any]],
+ thread_id: Optional[str],
+) -> List:
+ """Convert client messages to LangChain message objects.
+
+ When a stateful thread already has a checkpoint, only the new user messages
+ are returned (server owns history via MemorySaver). For new threads and
+ stateless calls the full message list is returned with the system prompt
+ prepended.
+ """
+ config = {"configurable": {"thread_id": thread_id}} if thread_id else None
+ is_new_thread = config is None or _memory.get(config) is None
+
+ lc_messages = []
+ for m in messages:
+ role = m.get("role")
+ content = m.get("content", "")
+ lc_messages.append(AIMessage(content=content) if role == "assistant" else HumanMessage(content=content))
+
+ if is_new_thread:
+ lc_messages = [SystemMessage(content=build_chat_system_prompt(context))] + lc_messages
+
+ return lc_messages
+
+
+def run_chat(
+ llm,
+ messages: List[Dict[str, str]],
+ context: Optional[Dict[str, Any]] = None,
+ allow_dashboard_edit: bool = False,
+ thread_id: Optional[str] = None,
+) -> Generator[Dict[str, Any], None, None]:
+ """Run one chat turn as a generator of events backed by LangGraph + MemorySaver.
+
+ Yields dict events: ``{"event": "tool_call"|"tool_result"|"card"|"token"|"done"|"error", ...}``.
+
+ When *thread_id* is ``None`` the call is stateless (client manages full history in
+ *messages*). When *thread_id* is provided the server owns history: pass only the
+ new message(s) in *messages* on follow-up turns.
+
+ Parameters
+ ----------
+ llm : Any
+ A langchain chat model (from ``build_llm_model``).
+ messages : list of dict
+ ``[{"role": "user"|"assistant", "content": "..."}]``.
+ Full history when *thread_id* is ``None``; only new messages otherwise.
+ context : dict, optional
+ UI context injected into the system prompt and chart tool.
+ allow_dashboard_edit : bool, optional
+ Whether dashboard-modifying tools are bound.
+ thread_id : str, optional
+ Stable ID that keys server-side conversation memory.
+ """
+ logger = FlowceptLogger()
+ context = _with_workflow_schema_context(context)
+ tools = _build_langchain_tools(context, allow_dashboard_edit)
+
+ effective_thread_id = thread_id if thread_id is not None else str(uuid.uuid4())
+
+ agent_id: Optional[str] = None
+ if INSTRUMENTATION_ENABLED:
+ from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
+
+ agent_id = BaseAgentContextManager.agent_id or effective_thread_id
+
+ try:
+ llm.bind_tools(tools)
+ except (NotImplementedError, AttributeError):
+ logger.warning("Chat LLM does not support tool binding; answering without tools.")
+ from langchain_core.messages import SystemMessage as _SM
+
+ lc = [_SM(content=build_chat_system_prompt(context))] + [
+ AIMessage(content=m.get("content", ""))
+ if m.get("role") == "assistant"
+ else HumanMessage(content=m.get("content", ""))
+ for m in messages
+ ]
+ try:
+ response = llm.invoke(lc)
+ yield {"event": "token", "data": getattr(response, "content", str(response))}
+ except Exception as exc:
+ logger.exception(exc)
+ yield {"event": "error", "data": str(exc)}
+ yield {"event": "done"}
+ return
+
+ config = {
+ "configurable": {"thread_id": effective_thread_id},
+ "recursion_limit": MAX_TOOL_ITERATIONS * 2 + 2,
+ }
+
+ graph = _build_graph(
+ llm,
+ tools,
+ agent_id=agent_id,
+ require_first_tool=(context or {}).get("tool_context", "db") in {"db", "df"},
+ )
+ lc_messages = _prepare_input_messages(messages, context, thread_id)
+
+ # Each LangGraph execution gets its own Flowcept workflow so all AI model
+ # invocations and tool calls within this call share a single workflow_id.
+ # Chat owns its persistence lifecycle so HTTP requests, tests, and deployed
+ # webservice instances all record agent provenance without external state.
+ from flowcept.flowcept_api.flowcept_controller import Flowcept as _FC
+
+ with _FC(
+ workflow_name=CHAT_WORKFLOW_NAME,
+ start_persistence=True,
+ save_workflow=True,
+ agent_name="FlowceptAgent",
+ ):
+ accumulated_tool_results: List[str] = []
+ try:
+ for chunk in graph.stream({"messages": lc_messages}, config=config, stream_mode="updates"):
+ for node_name, node_output in chunk.items():
+ msgs = node_output.get("messages", [])
+ if node_name == "agent":
+ last = msgs[-1] if msgs else None
+ if last is None:
+ continue
+ tool_calls = getattr(last, "tool_calls", None) or []
+ if tool_calls:
+ for tc in tool_calls:
+ yield {"event": "tool_call", "data": {"name": tc["name"], "args": tc.get("args", {})}}
+ else:
+ yield {"event": "token", "data": getattr(last, "content", "")}
+ yield {"event": "done"}
+ elif node_name == "tools":
+ for tm in msgs:
+ name = getattr(tm, "name", "")
+ accumulated_tool_results.append(f"[{name}]: {tm.content[:2000]}")
+ summary: Dict[str, Any] = {"name": name}
+ try:
+ parsed = json.loads(tm.content)
+ summary["code"] = parsed.get("code")
+ summary["tool_name"] = parsed.get("tool_name")
+ if name == "make_chart" and isinstance(parsed.get("result"), dict):
+ yield {"event": "card", "data": parsed["result"]}
+ if name == "highlight_lineage" and isinstance(parsed.get("result"), dict):
+ yield {"event": "ui:highlight", "data": parsed["result"]}
+ except Exception:
+ pass
+ yield {"event": "tool_result", "data": summary}
+ except GraphRecursionError:
+ logger.warning(
+ f"LLM hit the tool-call recursion limit ({MAX_TOOL_ITERATIONS} iterations) "
+ "without producing a final answer. Synthesizing from accumulated tool results."
+ )
+ if accumulated_tool_results:
+ summary_prompt = (
+ "The following tool results were retrieved. "
+ "Write a concise final answer to the user's question based solely on this data. "
+ "Do not call any tools.\n\n" + "\n\n".join(accumulated_tool_results)
+ )
+ try:
+ response = llm.invoke([HumanMessage(content=summary_prompt)])
+ content = getattr(response, "content", None) or str(response)
+ if content:
+ yield {"event": "token", "data": content}
+ else:
+ yield {"event": "token", "data": "\n\n".join(accumulated_tool_results[:3])}
+ except Exception as fallback_exc:
+ logger.exception(fallback_exc)
+ # Synthesis failed — surface raw tool results so the caller gets a 200
+ yield {"event": "token", "data": "\n\n".join(accumulated_tool_results[:3])}
+ else:
+ yield {"event": "error", "data": "Reached tool call limit without retrieving any data."}
+ yield {"event": "done"}
+ except Exception as e:
+ logger.exception(e)
+ yield {"event": "error", "data": _format_error(e)}
diff --git a/src/flowcept/agents/data_query_tools/__init__.py b/src/flowcept/agents/data_query_tools/__init__.py
new file mode 100644
index 00000000..31e7bcf1
--- /dev/null
+++ b/src/flowcept/agents/data_query_tools/__init__.py
@@ -0,0 +1 @@
+"""Plain-Python tool cores — no framework (MCP/LangChain) imports."""
diff --git a/src/flowcept/agents/data_query_tools/dashboard_tools.py b/src/flowcept/agents/data_query_tools/dashboard_tools.py
new file mode 100644
index 00000000..c4ad9f72
--- /dev/null
+++ b/src/flowcept/agents/data_query_tools/dashboard_tools.py
@@ -0,0 +1,122 @@
+"""Dashboard agent tools: chart building and dashboard CRUD.
+
+Plain Python — no LangChain, no MCP, no webservice imports.
+These tools are used by the LangGraph chat agent and MCP server; framework
+wrappers live in their respective layers.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+from flowcept.agents.tool_result import ToolResult
+from flowcept.commons.daos.docdb_dao.docdb_dao_utils import validate_filter as _validate_filter
+from flowcept.commons.flowcept_logger import FlowceptLogger
+from flowcept.commons.utils import normalize_docs
+from flowcept.flowcept_api.db_api import DBAPI
+
+
+def _guarded(tool_name: str):
+ """Decorator: validate filters, cap limits, and convert errors to ToolResult codes."""
+
+ def decorator(func):
+ def wrapper(*args, **kwargs):
+ try:
+ if "filter" in kwargs:
+ _validate_filter(kwargs.get("filter"))
+ return func(*args, **kwargs)
+ except ValueError as e:
+ return ToolResult(code=400, result=str(e), tool_name=tool_name)
+ except Exception as e:
+ FlowceptLogger().exception(e)
+ return ToolResult(code=499, result=f"Error in {tool_name}: {e}", tool_name=tool_name)
+
+ wrapper.__name__ = func.__name__
+ wrapper.__doc__ = func.__doc__
+ return wrapper
+
+ return decorator
+
+
+def _normalize(docs: List[Dict]) -> List[Dict]:
+ return normalize_docs(docs)
+
+
+@_guarded("make_chart")
+def make_chart(card_spec: Dict[str, Any], context: Optional[Dict[str, Any]] = None) -> ToolResult:
+ """Build a chart card: resolve a declarative data binding into plottable rows.
+
+ Parameters
+ ----------
+ card_spec : dict
+ A dashboard chart spec with a ``data`` binding describing what to query.
+ context : dict, optional
+ Extra filter ANDed into the chart data filter (e.g., ``{"workflow_id": "..."}``).
+
+ Returns
+ -------
+ ToolResult
+ ``result`` holds ``{"chart": , "rows": [...], "count": int}``.
+ """
+ data_spec = card_spec.get("data")
+ if not data_spec:
+ return ToolResult(code=400, result="Chart spec must include a data binding.", tool_name="make_chart")
+ _validate_filter(data_spec.get("filter", {}))
+ if context:
+ _validate_filter(context)
+ resolved = DBAPI().resolve_chart_data(data_spec, context=context)
+ result = {"chart": card_spec, "rows": _normalize(resolved["rows"]), "count": resolved["count"]}
+ return ToolResult(code=301, result=result, tool_name="make_chart")
+
+
+@_guarded("get_dashboard")
+def get_dashboard(dashboard_id: str) -> ToolResult:
+ """Get a stored dashboard spec by id.
+
+ Parameters
+ ----------
+ dashboard_id : str
+ Dashboard identifier.
+
+ Returns
+ -------
+ ToolResult
+ ``result`` holds the dashboard spec dict, or a 404 message.
+ """
+ doc = DBAPI.get_dao_instance().get_dashboard(dashboard_id)
+ if doc is None:
+ return ToolResult(code=404, result=f"Dashboard not found: {dashboard_id}", tool_name="get_dashboard")
+ return ToolResult(code=301, result=doc, tool_name="get_dashboard")
+
+
+@_guarded("update_dashboard")
+def update_dashboard(dashboard_id: str, spec: Dict[str, Any]) -> ToolResult:
+ """Replace a stored dashboard spec, preserving id and creation time.
+
+ Parameters
+ ----------
+ dashboard_id : str
+ Dashboard identifier.
+ spec : dict
+ Full replacement dashboard spec.
+
+ Returns
+ -------
+ ToolResult
+ ``result`` holds the saved dashboard spec dict.
+ """
+ dao = DBAPI.get_dao_instance()
+ existing = dao.get_dashboard(dashboard_id)
+ if existing is None:
+ return ToolResult(code=404, result=f"Dashboard not found: {dashboard_id}", tool_name="update_dashboard")
+ _validate_filter(spec.get("context", {}))
+ for card in spec.get("charts", []):
+ if card.get("data"):
+ _validate_filter(card["data"].get("filter", {}))
+ spec["dashboard_id"] = dashboard_id
+ spec["created_at"] = existing.get("created_at")
+ spec["updated_at"] = datetime.now(timezone.utc).isoformat()
+ if not dao.save_dashboard(spec):
+ return ToolResult(code=500, result="Could not save dashboard.", tool_name="update_dashboard")
+ return ToolResult(code=301, result=spec, tool_name="update_dashboard")
diff --git a/src/flowcept/agents/tools/prov_tools.py b/src/flowcept/agents/data_query_tools/db_query_tools.py
similarity index 57%
rename from src/flowcept/agents/tools/prov_tools.py
rename to src/flowcept/agents/data_query_tools/db_query_tools.py
index 5105c0f1..f964359c 100644
--- a/src/flowcept/agents/tools/prov_tools.py
+++ b/src/flowcept/agents/data_query_tools/db_query_tools.py
@@ -10,16 +10,12 @@
from typing import Any, Dict, List, Optional
-from datetime import datetime, timezone
-
-from flowcept.agents.agents_utils import ToolResult
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.data_query_tools.tools_utils import query_runtime_retry
from flowcept.commons.flowcept_logger import FlowceptLogger
from flowcept.configs import AGENT_CHAT_MAX_QUERY_LIMIT
from flowcept.flowcept_api.db_api import DBAPI
-from flowcept.webservice.schemas.dashboards import DashboardChart, DashboardSpec
-from flowcept.webservice.services import stats
-from flowcept.webservice.services.dashboard_store import get_dashboard_store
-from flowcept.webservice.services.serializers import normalize_docs
+from flowcept.commons.utils import normalize_docs
ALLOWED_FILTER_OPERATORS = {
"$and",
@@ -90,10 +86,51 @@ def wrapper(*args, **kwargs):
return decorator
+_WORKFLOW_HEAVY_FIELDS = frozenset(
+ {
+ "machine_info",
+ "flowcept_settings",
+ "code_repository",
+ "conf",
+ "extra_metadata",
+ "environment_id",
+ "sys_name",
+ "interceptor_ids",
+ "adapter_id",
+ "flowcept_version",
+ }
+)
+
+
def _normalize(docs: List[Dict]) -> List[Dict]:
return normalize_docs(docs)
+def _normalize_workflows(docs: List[Dict]) -> List[Dict]:
+ """Normalize workflow docs, stripping heavy infrastructure-only fields for LLM responses."""
+ pruned = [{k: v for k, v in doc.items() if k not in _WORKFLOW_HEAVY_FIELDS} for doc in docs]
+ return normalize_docs(pruned)
+
+
+def _sanitize_projection(projection: Optional[List[str]]) -> Optional[List[str]]:
+ """Remove child paths whose parent field is already in *projection*.
+
+ MongoDB raises ``OperationFailure: Path collision`` when a projection
+ includes both ``"generated"`` and ``"generated.val_accuracy"``. This
+ helper strips the redundant children so the parent field covers them.
+ """
+ if not projection:
+ return projection
+ result = []
+ for field in projection:
+ parts = field.split(".")
+ # keep this field only if none of its parent paths is already included
+ parent_already_included = any(".".join(parts[:i]) in projection for i in range(1, len(parts)))
+ if not parent_already_included:
+ result.append(field)
+ return result or None
+
+
@_guarded("query_tasks")
def query_tasks(
filter: Optional[Dict[str, Any]] = None,
@@ -120,7 +157,27 @@ def query_tasks(
``result`` holds ``{"items": [...], "count": int}``.
"""
sort_tuples = None if not sort else [(s["field"], s["order"]) for s in sort]
- docs = DBAPI().task_query(filter=filter or {}, projection=projection, limit=limit, sort=sort_tuples) or []
+ proj_holder = [_sanitize_projection(projection)]
+
+ def _execute():
+ return (
+ DBAPI().task_query(
+ filter=filter or {},
+ projection=proj_holder[0],
+ limit=limit,
+ sort=sort_tuples,
+ )
+ or []
+ )
+
+ def _fix(exc, attempt):
+ # Only auto-fix MongoDB projection path-collision errors; let others propagate.
+ if "Path collision" not in str(exc):
+ raise exc
+ proj_holder[0] = _sanitize_projection(proj_holder[0])
+ return _execute
+
+ docs = query_runtime_retry(_execute, _fix, max_attempts=2)
items = _normalize(docs)
return ToolResult(code=301, result={"items": items, "count": len(items)}, tool_name="query_tasks")
@@ -142,7 +199,7 @@ def query_workflows(filter: Optional[Dict[str, Any]] = None, limit: int = 100) -
``result`` holds ``{"items": [...], "count": int}``.
"""
docs = (DBAPI().workflow_query(filter=filter or {}) or [])[:limit]
- items = _normalize(docs)
+ items = _normalize_workflows(docs)
return ToolResult(code=301, result={"items": items, "count": len(items)}, tool_name="query_workflows")
@@ -160,61 +217,91 @@ def get_task_summary(filter: Optional[Dict[str, Any]] = None) -> ToolResult:
ToolResult
``result`` holds the summary dict.
"""
- summary = stats.task_summary(DBAPI(), filter or {})
+ summary = DBAPI().task_summary(filter or {})
+ activity_stats = summary.get("activity_stats") or []
+ summary["activity_ids"] = [row.get("activity_id") for row in activity_stats if row.get("activity_id")]
+ summary["activity_counts"] = {
+ row.get("activity_id"): row.get("count") for row in activity_stats if row.get("activity_id")
+ }
return ToolResult(code=301, result=_normalize([summary])[0], tool_name="get_task_summary")
@_guarded("list_campaigns")
-def list_campaigns() -> ToolResult:
+def list_campaigns(campaign_id: Optional[str] = None) -> ToolResult:
"""List derived campaign summaries (campaigns group workflows and tasks).
+ Parameters
+ ----------
+ campaign_id : str, optional
+ When provided, only the summary for that campaign is returned.
+ Pass the campaign_id from the user context to scope the result.
+
Returns
-------
ToolResult
``result`` holds ``{"items": [...], "count": int}``.
"""
- items = _normalize(stats.derive_campaigns(DBAPI()))
+ items = _normalize(DBAPI().derive_campaigns(campaign_id=campaign_id))
return ToolResult(code=301, result={"items": items, "count": len(items)}, tool_name="list_campaigns")
@_guarded("list_agents")
-def list_agents() -> ToolResult:
+def list_agents(filter: Dict = None) -> ToolResult:
"""List derived agent summaries (agents observed in task provenance).
+ Parameters
+ ----------
+ filter : dict, optional
+ Mongo-style filter to scope the agent derivation (e.g., ``{"workflow_id": "..."}``).
+
Returns
-------
ToolResult
``result`` holds ``{"items": [...], "count": int}``.
"""
- items = _normalize(stats.derive_agents(DBAPI()))
+ items = _normalize(DBAPI().derive_agents(filter))
return ToolResult(code=301, result={"items": items, "count": len(items)}, tool_name="list_agents")
-@_guarded("make_chart")
-def make_chart(card_spec: Dict[str, Any], context: Optional[Dict[str, Any]] = None) -> ToolResult:
- """Build a dashboard-style chart card: validate the spec and resolve its data rows.
+@_guarded("query_objects")
+def query_objects(
+ filter: Optional[Dict[str, Any]] = None,
+ projection: Optional[List[str]] = None,
+ limit: int = 100,
+) -> ToolResult:
+ """Query stored data-object provenance records with a Mongo-style filter.
+
+ Data objects include ML models (``object_type="ml_model"``), datasets
+ (``object_type="dataset"``), and generic blobs. Their ``custom_metadata``
+ field carries artifact-specific information such as ``model_profile.params``,
+ ``n_input_neurons``, ``loss``, ``split_ratio``, and ``n_samples``.
+ Use this tool when the user asks about model parameters, dataset size, file
+ types, artifact sizes, or any stored artifact metadata.
Parameters
----------
- card_spec : dict
- A dashboard ``DashboardChart`` spec (type chart/metric/table with a ``data`` binding).
- context : dict, optional
- Extra filter ANDed into the chart data filter (e.g., ``{"workflow_id": "..."}``).
+ filter : dict, optional
+ Mongo-style filter. Common fields: ``object_type``, ``workflow_id``,
+ ``task_id``, ``tags``. ``custom_metadata`` sub-fields use dot-notation,
+ e.g. ``{"custom_metadata.model_profile.params": {"$gt": 2}}``.
+ projection : list of str, optional
+ Fields to include (dot-notation accepted).
+ limit : int, optional
+ Maximum records (capped by settings).
Returns
-------
ToolResult
- ``result`` holds ``{"chart": , "rows": [...], "count": int}``.
+ ``result`` holds ``{"items": [...], "count": int}``.
"""
- card = DashboardChart(**card_spec)
- if card.data is None:
- return ToolResult(code=400, result="Chart spec must include a data binding.", tool_name="make_chart")
- validate_filter(card.data.filter)
- if context:
- validate_filter(context)
- resolved = stats.resolve_chart_data(DBAPI(), card.data, context=context)
- result = {"chart": card.model_dump(), "rows": _normalize(resolved["rows"]), "count": resolved["count"]}
- return ToolResult(code=301, result=result, tool_name="make_chart")
+ capped = min(limit, MAX_QUERY_LIMIT)
+ docs = (DBAPI().blob_object_query(filter=filter or {}) or [])[:capped]
+ items = _normalize(docs)
+ if projection:
+ safe_proj = set(_sanitize_projection(projection) or [])
+ if safe_proj:
+ items = [{k: v for k, v in d.items() if k in safe_proj} for d in items]
+ return ToolResult(code=301, result={"items": items, "count": len(items)}, tool_name="query_objects")
@_guarded("highlight_lineage")
@@ -257,64 +344,28 @@ def highlight_lineage(
if not resolved_ids:
return ToolResult(code=404, result="No tasks found for the given criteria.", tool_name="highlight_lineage")
- # Return only the seed task IDs. The frontend BFS expands ancestors/descendants
+ # Fetch activity names for the resolved task IDs so the LLM can describe the lineage.
+ activity_map: Dict[str, str] = {}
+ try:
+ detail_docs = (
+ db.task_query(
+ filter={"task_id": {"$in": resolved_ids}},
+ projection=["task_id", "activity_id", "agent_id"],
+ limit=len(resolved_ids) + 10,
+ )
+ or []
+ )
+ for doc in detail_docs:
+ tid = doc.get("task_id", "")
+ if tid:
+ activity_map[tid] = doc.get("activity_id") or doc.get("agent_id") or ""
+ except Exception:
+ pass
+
+ # Return seed task IDs. The frontend BFS expands ancestors/descendants
# from these seeds using the dataflow graph — a single source of truth for lineage.
return ToolResult(
code=301,
- result={"task_ids": resolved_ids},
+ result={"task_ids": resolved_ids, "activities": activity_map},
tool_name="highlight_lineage",
)
-
-
-@_guarded("get_dashboard")
-def get_dashboard(dashboard_id: str) -> ToolResult:
- """Get a stored dashboard spec by id.
-
- Parameters
- ----------
- dashboard_id : str
- Dashboard identifier.
-
- Returns
- -------
- ToolResult
- ``result`` holds the dashboard spec dict, or a 404 message.
- """
- doc = get_dashboard_store().get(dashboard_id)
- if doc is None:
- return ToolResult(code=404, result=f"Dashboard not found: {dashboard_id}", tool_name="get_dashboard")
- return ToolResult(code=301, result=doc, tool_name="get_dashboard")
-
-
-@_guarded("update_dashboard")
-def update_dashboard(dashboard_id: str, spec: Dict[str, Any]) -> ToolResult:
- """Replace a stored dashboard spec (validated), preserving id and creation time.
-
- Parameters
- ----------
- dashboard_id : str
- Dashboard identifier.
- spec : dict
- Full replacement ``DashboardSpec``.
-
- Returns
- -------
- ToolResult
- ``result`` holds the saved dashboard spec dict.
- """
- store = get_dashboard_store()
- existing = store.get(dashboard_id)
- if existing is None:
- return ToolResult(code=404, result=f"Dashboard not found: {dashboard_id}", tool_name="update_dashboard")
- validated = DashboardSpec(**spec)
- validate_filter(validated.context)
- for card in validated.cards:
- if card.data is not None:
- validate_filter(card.data.filter)
- validated.dashboard_id = dashboard_id
- validated.created_at = existing.get("created_at")
- validated.updated_at = datetime.now(timezone.utc).isoformat()
- doc = validated.model_dump()
- if not store.save(doc):
- return ToolResult(code=500, result="Could not save dashboard.", tool_name="update_dashboard")
- return ToolResult(code=301, result=doc, tool_name="update_dashboard")
diff --git a/src/flowcept/agents/data_query_tools/df_query_tools.py b/src/flowcept/agents/data_query_tools/df_query_tools.py
new file mode 100644
index 00000000..963555f1
--- /dev/null
+++ b/src/flowcept/agents/data_query_tools/df_query_tools.py
@@ -0,0 +1,565 @@
+"""Plain-Python DF (DataFrame) query tools.
+
+Functions in this module operate on pandas DataFrames and do NOT import from the
+MCP framework (no ``@mcp_flowcept.tool()``). The MCP layer lives in
+``mcp_tools/df_query_mcp_tools.py``.
+"""
+
+import json
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.llm.builders import build_llm_model
+from flowcept.agents.data_query_tools.tools_utils import query_runtime_retry
+from flowcept.commons.flowcept_logger import FlowceptLogger
+
+from flowcept.agents.data_query_tools.pandas_utils import (
+ load_saved_df,
+ safe_execute,
+ safe_json_parse,
+ normalize_output,
+ format_result_df,
+ summarize_df,
+)
+
+from flowcept.agents.prompts.df_query_prompts import (
+ build_plot_code_prompt,
+ extract_or_fix_json_code_prompt,
+ build_pandas_code_prompt,
+ build_dataframe_summarizer_prompt,
+ build_extract_or_fix_python_code_prompt,
+)
+
+EMPTY_DF_MESSAGE = "Current df is empty or null."
+
+
+def _call_llm(llm, prompt: str) -> str:
+ """Call an LLM with a string prompt and always return a plain string.
+
+ Handles both ``FlowceptLLM`` (whose ``invoke`` already returns ``str``)
+ and raw LangChain models (whose ``invoke`` returns an ``AIMessage``).
+ """
+ response = llm.invoke(prompt)
+ return response.content if hasattr(response, "content") else str(response)
+
+
+def run_df_query(
+ query: str,
+ df,
+ schema,
+ value_examples,
+ custom_user_guidance,
+ llm=None,
+ plot=False,
+ context_kind: str = "tasks",
+) -> ToolResult:
+ r"""Run a natural language query against a DataFrame.
+
+ Parameters
+ ----------
+ query : str
+ Natural language query or Python code snippet.
+ df : pandas.DataFrame
+ The DataFrame to query.
+ schema : dict
+ Schema of the DataFrame.
+ value_examples : dict
+ Example values for each field.
+ custom_user_guidance : list
+ Custom guidance strings from the user.
+ llm : callable, optional
+ LLM callable. Built from settings if None.
+ plot : bool, optional
+ If True, generate plotting code.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ if df is None or not len(df):
+ return ToolResult(code=404, result=EMPTY_DF_MESSAGE)
+ if "save" in query:
+ return save_df(df, schema, value_examples)
+ if "result = df" in query:
+ return run_df_code(user_code=query, df=df)
+
+ if plot:
+ return generate_plot_code(
+ llm,
+ query,
+ schema,
+ value_examples,
+ df,
+ custom_user_guidance=custom_user_guidance,
+ context_kind=context_kind,
+ )
+ return generate_result_df(
+ llm,
+ query,
+ schema,
+ value_examples,
+ df,
+ custom_user_guidance=custom_user_guidance,
+ context_kind=context_kind,
+ )
+
+
+def execute_df_code(user_code: str, df) -> ToolResult:
+ """Execute externally generated pandas code against a DataFrame.
+
+ Parameters
+ ----------
+ user_code : str
+ Pandas code expected to assign output to ``result``.
+ df : pandas.DataFrame
+ DataFrame to execute against.
+
+ Returns
+ -------
+ ToolResult
+ """
+ if df is None or not len(df):
+ return ToolResult(code=404, result=EMPTY_DF_MESSAGE)
+ return run_df_code(user_code=user_code, df=df)
+
+
+def generate_plot_code(
+ llm,
+ query,
+ dynamic_schema,
+ value_examples,
+ df,
+ custom_user_guidance=None,
+ context_kind="tasks",
+) -> ToolResult:
+ """Generate DataFrame and plotting code from a natural language query using an LLM.
+
+ Parameters
+ ----------
+ llm : callable
+ LLM callable.
+ query : str
+ Natural language query.
+ dynamic_schema : dict
+ Schema of the DataFrame.
+ value_examples : dict
+ Example values.
+ df : pandas.DataFrame
+ The DataFrame.
+ custom_user_guidance : list, optional
+ Custom guidance strings.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ if llm is None:
+ llm = build_llm_model()
+ plot_prompt = build_plot_code_prompt(
+ query,
+ dynamic_schema,
+ value_examples,
+ list(df.columns),
+ context_kind=context_kind,
+ )
+ try:
+ response = _call_llm(llm, plot_prompt)
+ except Exception as e:
+ return ToolResult(code=400, result=str(e), extra=plot_prompt)
+
+ result_code, plot_code, description = None, None, ""
+ try:
+ parsed = safe_json_parse(response)
+ result_code = parsed["result_code"]
+ plot_code = parsed["plot_code"]
+ description = parsed.get("description", "")
+ except (ValueError, KeyError):
+ tool_response = extract_or_fix_json_code(llm, response)
+ if tool_response.code != 201:
+ return ToolResult(code=499, result=tool_response.result)
+ try:
+ parsed = safe_json_parse(tool_response.result)
+ result_code = parsed.get("result_code")
+ plot_code = parsed.get("plot_code")
+ description = parsed.get("description", "")
+ if not result_code or not plot_code:
+ return ToolResult(
+ code=405,
+ result=f"Fixed JSON missing result_code or plot_code: {parsed}",
+ extra=plot_prompt,
+ )
+ except ValueError as e:
+ return ToolResult(
+ code=405,
+ result=f"Tried to parse this as JSON: {tool_response.result}, but got Error: {e}",
+ extra=plot_prompt,
+ )
+ except Exception as e:
+ return ToolResult(code=499, result=str(e), extra=plot_prompt)
+
+ columns = list(df.columns)
+ code_holder = [result_code]
+ retry_count = [0]
+
+ def _execute():
+ return safe_execute(df, code_holder[0])
+
+ def _fix(exc, attempt):
+ tool_result = extract_or_fix_python_code(llm, code_holder[0], columns, runtime_error=str(exc))
+ if tool_result.code != 201:
+ raise RuntimeError(f"LLM could not fix the code: {tool_result.result}")
+ code_holder[0] = tool_result.result
+ retry_count[0] += 1
+ return _execute
+
+ try:
+ result_df = query_runtime_retry(_execute, _fix, max_attempts=3)
+ result_code = code_holder[0]
+ except Exception as e:
+ return ToolResult(code=406, result=str(e), extra={"retry_attempts": retry_count[0]})
+
+ try:
+ result_df = format_result_df(result_df)
+ except Exception as e:
+ return ToolResult(code=404, result=str(e))
+
+ return ToolResult(
+ code=301,
+ result={"result_df": result_df, "plot_code": plot_code, "result_code": result_code, "description": description},
+ tool_name="generate_plot_code",
+ extra={"retry_attempts": retry_count[0]},
+ )
+
+
+def generate_result_df(
+ llm,
+ query: str,
+ dynamic_schema,
+ example_values,
+ df,
+ custom_user_guidance=None,
+ attempt_fix=True,
+ summarize=True,
+ context_kind="tasks",
+) -> ToolResult:
+ """Generate a result DataFrame from a natural language query using an LLM.
+
+ Parameters
+ ----------
+ llm : callable
+ LLM callable. Built from settings if None.
+ query : str
+ Natural language query.
+ dynamic_schema : dict
+ Schema of the DataFrame.
+ example_values : dict
+ Example values.
+ df : pandas.DataFrame
+ The DataFrame to query.
+ custom_user_guidance : list, optional
+ Custom guidance strings.
+ attempt_fix : bool, optional
+ If True, attempt to fix invalid generated code.
+ summarize : bool, optional
+ If True, summarize the result.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ _logger = FlowceptLogger()
+ if llm is None:
+ llm = build_llm_model()
+ try:
+ prompt = build_pandas_code_prompt(
+ query,
+ dynamic_schema,
+ example_values,
+ custom_user_guidance,
+ list(df.columns),
+ context_kind=context_kind,
+ )
+ response = _call_llm(llm, prompt)
+ except Exception as e:
+ return ToolResult(code=400, result=str(e), extra=prompt)
+
+ result_code = response
+ columns = list(df.columns)
+
+ code_holder = [result_code]
+ retry_count = [0]
+
+ def _execute():
+ return safe_execute(df, code_holder[0])
+
+ def _fix(exc, attempt):
+ if not attempt_fix:
+ raise exc
+ tool_result = extract_or_fix_python_code(llm, code_holder[0], columns, runtime_error=str(exc))
+ if tool_result.code != 201:
+ raise RuntimeError(f"LLM could not fix the code: {tool_result.result}")
+ code_holder[0] = tool_result.result
+ retry_count[0] += 1
+ return _execute
+
+ try:
+ result_df = query_runtime_retry(_execute, _fix, max_attempts=3)
+ result_code = code_holder[0]
+ except Exception as e:
+ return ToolResult(
+ code=405,
+ result=(f"Failed to execute after retries: ```python\n{code_holder[0]}```\nLast error: {e}"),
+ extra={
+ "generated_code": code_holder[0],
+ "exception": str(e),
+ "prompt": prompt,
+ "retry_attempts": retry_count[0],
+ },
+ )
+
+ try:
+ result_df = normalize_output(result_df)
+ except Exception as e:
+ return ToolResult(
+ code=504,
+ result="Failed to normalize output.",
+ extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
+ )
+
+ result_df = result_df.dropna(axis=1, how="all")
+
+ return_code = 301
+ summary, summary_error = None, None
+ if summarize:
+ try:
+ tool_result = summarize_result(
+ llm,
+ result_code,
+ result_df,
+ query,
+ dynamic_schema,
+ example_values,
+ list(df.columns),
+ context_kind=context_kind,
+ )
+ if tool_result.is_success():
+ return_code = 301
+ summary = tool_result.result
+ else:
+ return_code = 302
+ summary_error = tool_result.result
+ except Exception as e:
+ _logger.exception(e)
+ summary = ""
+ summary_error = str(e)
+ return_code = 303
+
+ try:
+ result_df_str = format_result_df(result_df)
+ except Exception as e:
+ return ToolResult(
+ code=405,
+ result="Failed to format output.",
+ extra={"generated_code": result_code, "exception": str(e), "prompt": prompt},
+ )
+
+ return ToolResult(
+ code=return_code,
+ result={
+ "result_code": result_code,
+ "result_df": result_df_str,
+ "result_df_markdown": result_df.to_markdown(index=False),
+ "summary": summary,
+ "summary_error": summary_error,
+ },
+ tool_name="generate_result_df",
+ extra={"prompt": prompt, "retry_attempts": retry_count[0]},
+ )
+
+
+def run_df_code(user_code: str, df) -> ToolResult:
+ """Execute user-provided Python code on a DataFrame and format the result.
+
+ Parameters
+ ----------
+ user_code : str
+ Python code string that operates on the DataFrame.
+ df : pandas.DataFrame
+ The input DataFrame.
+
+ Returns
+ -------
+ ToolResult
+ """
+ try:
+ result_df = safe_execute(df, user_code)
+ except Exception as e:
+ return ToolResult(code=405, result=f"Failed to run this as Python code: {user_code}. Got error {e}")
+
+ try:
+ result_df = normalize_output(result_df)
+ except Exception as e:
+ return ToolResult(code=405, result=str(e))
+
+ result_df = result_df.dropna(axis=1, how="all")
+ return ToolResult(
+ code=301,
+ result={"result_code": user_code, "result_df": format_result_df(result_df)},
+ tool_name="run_df_code",
+ )
+
+
+def extract_or_fix_python_code(llm, raw_text, current_fields, runtime_error: str = None) -> ToolResult:
+ """Extract or repair Python code from raw text using an LLM.
+
+ Parameters
+ ----------
+ llm : callable
+ LLM callable.
+ raw_text : str
+ Raw text possibly containing Python code.
+ current_fields : list
+ Available DataFrame column names.
+ runtime_error : str, optional
+ Exception message from a previous execution attempt. When provided,
+ the LLM is explicitly asked to fix that runtime error.
+
+ Returns
+ -------
+ ToolResult
+ """
+ prompt = build_extract_or_fix_python_code_prompt(raw_text, current_fields, runtime_error=runtime_error)
+ try:
+ response = _call_llm(llm, prompt)
+ return ToolResult(code=201, result=response)
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
+
+
+def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
+ """Extract or repair JSON code from raw text using an LLM.
+
+ Parameters
+ ----------
+ llm : callable
+ LLM callable.
+ raw_text : str
+ Raw text possibly containing JSON.
+
+ Returns
+ -------
+ ToolResult
+ """
+ prompt = extract_or_fix_json_code_prompt(raw_text)
+ try:
+ response = _call_llm(llm, prompt)
+ return ToolResult(code=201, result=response)
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
+
+
+def summarize_result(
+ llm,
+ code,
+ result,
+ query: str,
+ dynamic_schema,
+ example_values,
+ current_fields,
+ context_kind="tasks",
+) -> ToolResult:
+ """Summarize a pandas result with local reduction for large DataFrames.
+
+ Parameters
+ ----------
+ llm : callable
+ LLM callable.
+ code : str
+ The pandas code that produced the result.
+ result : pandas.DataFrame
+ The result DataFrame.
+ query : str
+ The original user query.
+ dynamic_schema : dict
+ Schema of the DataFrame.
+ example_values : dict
+ Example values.
+ current_fields : list
+ Current DataFrame column names.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ summarized_df = summarize_df(result, code)
+ prompt = build_dataframe_summarizer_prompt(
+ code,
+ summarized_df,
+ dynamic_schema,
+ example_values,
+ query,
+ current_fields,
+ context_kind=context_kind,
+ )
+ try:
+ response = _call_llm(llm, prompt)
+ return ToolResult(code=201, result=response)
+ except Exception as e:
+ return ToolResult(code=400, result=str(e))
+
+
+def save_df(df, schema, value_examples) -> ToolResult:
+ """Save a DataFrame, its schema, and example values to temporary files.
+
+ Parameters
+ ----------
+ df : pandas.DataFrame
+ The DataFrame to save.
+ schema : dict
+ Schema dict.
+ value_examples : dict
+ Example values dict.
+
+ Returns
+ -------
+ ToolResult
+ """
+ with open("/tmp/current_tasks_schema.json", "w") as f:
+ json.dump(schema, f, indent=2)
+ with open("/tmp/value_examples.json", "w") as f:
+ json.dump(value_examples, f, indent=2)
+ df.to_csv("/tmp/current_agent_df.csv", index=False)
+ return ToolResult(code=201, result="Saved df and schema to /tmp directory")
+
+
+def query_on_saved_df(query: str, dynamic_schema_path, value_examples_path, df_path):
+ """Run a natural language query against a saved DataFrame.
+
+ Parameters
+ ----------
+ query : str
+ Natural language query.
+ dynamic_schema_path : str
+ Path to a JSON schema file.
+ value_examples_path : str
+ Path to a JSON example values file.
+ df_path : str
+ Path to the saved DataFrame CSV file.
+
+ Returns
+ -------
+ ToolResult
+ """
+ df = load_saved_df(df_path)
+ with open(dynamic_schema_path) as f:
+ dynamic_schema = json.load(f)
+ with open(value_examples_path) as f:
+ value_examples = json.load(f)
+ llm = build_llm_model()
+ return generate_result_df(llm, query, dynamic_schema, value_examples, df, attempt_fix=False, summarize=False)
diff --git a/src/flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py b/src/flowcept/agents/data_query_tools/pandas_utils.py
similarity index 89%
rename from src/flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py
rename to src/flowcept/agents/data_query_tools/pandas_utils.py
index 86cf0f71..92463495 100644
--- a/src/flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py
+++ b/src/flowcept/agents/data_query_tools/pandas_utils.py
@@ -73,6 +73,9 @@ def normalize_output(result):
else:
raise ValueError(f"Unsupported ndarray shape: {result.shape}")
+ elif isinstance(result, dict):
+ _df = pd.DataFrame([result])
+
else:
raise TypeError(f"Unsupported result type: {type(result)}")
@@ -216,6 +219,11 @@ def clean_code(text):
"""
Extracts the first valid Python code block or line that starts with 'result =' from a model response.
+ Handles:
+ - Fenced code blocks (```python ... ```)
+ - Multi-line code with intermediate variable assignments before result = ...
+ - Single-line result = ... statements
+
Parameters
----------
text : str
@@ -231,7 +239,26 @@ def clean_code(text):
if block_match:
return block_match.group(1).strip()
- # Fallback: try to find a line that starts with "result ="
+ # Scan for consecutive Python-looking lines starting from the first identifier assignment.
+ # This handles multi-line code with intermediate variables (e.g., per_act = ...; result = ...).
+ code_lines = []
+ in_code = False
+ for line in text.strip().splitlines():
+ stripped = line.strip()
+ if not in_code and re.match(r"^[a-zA-Z_]\w*\s*=", stripped):
+ in_code = True
+ if in_code:
+ code_lines.append(line)
+ if code_lines:
+ candidate = "\n".join(code_lines).strip()
+ try:
+ compile(candidate, "", "exec")
+ if "result" in candidate:
+ return candidate
+ except SyntaxError:
+ pass
+
+ # Single-line fallback for prose+code responses where only the first assignment matters
line_match = re.search(r"(result\s*=\s*.+)", text)
if line_match:
return line_match.group(1).strip()
diff --git a/src/flowcept/agents/data_query_tools/tools_utils.py b/src/flowcept/agents/data_query_tools/tools_utils.py
new file mode 100644
index 00000000..654ced47
--- /dev/null
+++ b/src/flowcept/agents/data_query_tools/tools_utils.py
@@ -0,0 +1,50 @@
+"""Shared retry loop for query runtime errors.
+
+Covers only errors raised when *executing* a generated query — pandas runtime
+errors for the DF path and MongoDB OperationFailure for the DB path.
+
+Out of scope: JSON/code parse errors, LLM output format errors, network errors,
+auth errors, and schema validation errors.
+"""
+
+
+def query_runtime_retry(execute_fn, fix_fn, max_attempts: int = 3):
+ """Run *execute_fn*; on runtime error call *fix_fn* to get a corrected
+ callable and retry, up to *max_attempts* total attempts.
+
+ Parameters
+ ----------
+ execute_fn : callable
+ Parameterless callable that executes the query. Returns a value on
+ success; raises an exception on query runtime error.
+ fix_fn : callable(exc, attempt) -> new_execute_fn
+ Called with the caught exception and the zero-based attempt index.
+ Must return a new (or updated) parameterless callable that re-runs the
+ corrected query. Raise from *fix_fn* to signal that the error is
+ unrecoverable — no further retries will be made.
+ max_attempts : int, optional
+ Total number of attempts, including the first. Default is 3.
+
+ Returns
+ -------
+ object
+ Whatever the first successful *execute_fn* call returns.
+
+ Raises
+ ------
+ Exception
+ Re-raises the last caught exception when all attempts are exhausted or
+ *fix_fn* itself raises.
+ """
+ last_exc = None
+ for attempt in range(max_attempts):
+ try:
+ return execute_fn()
+ except Exception as exc:
+ last_exc = exc
+ if attempt < max_attempts - 1:
+ try:
+ execute_fn = fix_fn(exc, attempt)
+ except Exception:
+ break
+ raise last_exc
diff --git a/src/flowcept/agents/gui/README.md b/src/flowcept/agents/gui/README.md
new file mode 100644
index 00000000..4e7ddedb
--- /dev/null
+++ b/src/flowcept/agents/gui/README.md
@@ -0,0 +1,11 @@
+# Deprecated GUI
+
+This module is deprecated.
+
+Flowcept UI work has moved to the web UI under `src/flowcept/webservice/`.
+That UI includes the chat interface and is the place for current and future
+user-facing interaction work.
+
+Keep this package only for legacy reference while the old Streamlit agent GUI
+is retired.
+
diff --git a/src/flowcept/agents/gui/gui_utils.py b/src/flowcept/agents/gui/gui_utils.py
index 0596abf1..838fa283 100644
--- a/src/flowcept/agents/gui/gui_utils.py
+++ b/src/flowcept/agents/gui/gui_utils.py
@@ -10,8 +10,8 @@
import streamlit as st
from flowcept.agents.gui import AI
from flowcept.agents import prompt_handler
-from flowcept.agents.agent_client import run_tool
-from flowcept.agents.agents_utils import ToolResult
+from flowcept.agents.mcp.mcp_client import run_tool
+from flowcept.agents.tool_result import ToolResult
from flowcept.agents.gui.audio_utils import _md_to_plain_text, speak
diff --git a/src/flowcept/agents/llm/__init__.py b/src/flowcept/agents/llm/__init__.py
new file mode 100644
index 00000000..c124df5d
--- /dev/null
+++ b/src/flowcept/agents/llm/__init__.py
@@ -0,0 +1 @@
+"""LLM infrastructure for the Flowcept agent subsystem."""
diff --git a/src/flowcept/agents/llm/builders.py b/src/flowcept/agents/llm/builders.py
new file mode 100644
index 00000000..80d9ddd4
--- /dev/null
+++ b/src/flowcept/agents/llm/builders.py
@@ -0,0 +1,113 @@
+"""LLM factory and message normalization utilities."""
+
+import os
+import re
+import unicodedata
+
+from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
+from flowcept.instrumentation.flowcept_agent_task import FlowceptLLM, get_current_context_task
+from flowcept.configs import AGENT
+
+
+def build_llm_model(
+ model_name=None,
+ model_kwargs=None,
+ service_provider=None,
+ agent_id=BaseAgentContextManager.agent_id,
+ track_tools=True,
+ return_response_object=False,
+) -> FlowceptLLM:
+ """Build and return an LLM instance using agent configuration.
+
+ Returns
+ -------
+ FlowceptLLM
+ An initialized LLM object configured using the ``AGENT`` settings.
+ """
+ _model_kwargs = (AGENT.get("model_kwargs") or {}).copy()
+ if model_kwargs is not None:
+ for k in model_kwargs:
+ _model_kwargs[k] = model_kwargs[k]
+
+ if "model" not in _model_kwargs:
+ _model_kwargs["model"] = AGENT.get("model", model_name)
+
+ if service_provider:
+ _service_provider = service_provider
+ else:
+ _service_provider = AGENT.get("service_provider")
+
+ if _service_provider == "sambanova":
+ from langchain_community.llms.sambanova import SambaStudio
+
+ os.environ["SAMBASTUDIO_URL"] = os.environ.get("SAMBASTUDIO_URL", AGENT.get("llm_server_url"))
+ os.environ["SAMBASTUDIO_API_KEY"] = os.environ.get("SAMBASTUDIO_API_KEY", AGENT.get("api_key"))
+
+ llm = SambaStudio(model_kwargs=_model_kwargs)
+ elif _service_provider == "azure":
+ from langchain_openai.chat_models.azure import AzureChatOpenAI
+
+ api_key = os.environ.get("AZURE_OPENAI_API_KEY", AGENT.get("api_key", None))
+ service_url = os.environ.get("AZURE_OPENAI_API_ENDPOINT", AGENT.get("llm_server_url", None))
+ llm = AzureChatOpenAI(
+ azure_deployment=_model_kwargs.get("model"), azure_endpoint=service_url, api_key=api_key, **_model_kwargs
+ )
+ elif _service_provider == "openai":
+ from langchain_openai import ChatOpenAI
+
+ api_key = os.environ.get("OPENAI_API_KEY", AGENT.get("api_key", None))
+ base_url = os.environ.get("OPENAI_BASE_URL", AGENT.get("llm_server_url") or None)
+ org = os.environ.get("OPENAI_ORG_ID", AGENT.get("organization", None))
+
+ init_kwargs = {"api_key": api_key}
+ if base_url:
+ init_kwargs["base_url"] = base_url
+ if org:
+ init_kwargs["organization"] = org
+
+ llm = ChatOpenAI(**init_kwargs, **_model_kwargs)
+ elif _service_provider == "google":
+ if "claude" in _model_kwargs["model"]:
+ api_key = os.environ.get("GOOGLE_API_KEY", AGENT.get("api_key", None))
+ _model_kwargs["model_id"] = _model_kwargs.pop("model")
+ _model_kwargs["google_token_auth"] = api_key
+ from flowcept.agents.llm.providers.claude_gcp import ClaudeOnGCPLLM
+
+ llm = ClaudeOnGCPLLM(**_model_kwargs)
+ elif "gemini" in _model_kwargs["model"]:
+ from flowcept.agents.llm.providers.gemini25 import Gemini25LLM
+
+ llm = Gemini25LLM(**_model_kwargs)
+ else:
+ raise Exception("Currently supported providers are sambanova, openai, azure, and google.")
+
+ if track_tools:
+ llm = FlowceptLLM(llm, return_response_object=return_response_object)
+ if agent_id is None:
+ agent_id = BaseAgentContextManager.agent_id
+ llm.agent_id = agent_id
+ tool_task = get_current_context_task()
+ if tool_task:
+ llm.parent_task_id = tool_task.task_id
+ return llm
+
+
+def normalize_message(user_msg: str) -> str:
+ """Normalize a user message into a canonical, comparison-friendly form.
+
+ Parameters
+ ----------
+ user_msg : str
+ Raw user input message.
+
+ Returns
+ -------
+ str
+ Normalized message suitable for matching, comparison, or hashing.
+ """
+ user_msg = user_msg.strip()
+ user_msg = unicodedata.normalize("NFKC", user_msg)
+ user_msg = user_msg.replace("–", "-").replace("—", "-")
+ user_msg = re.sub(r"\s+", " ", user_msg)
+ user_msg = re.sub(r"[?!.\s]+$", "", user_msg)
+ return user_msg.lower()
diff --git a/src/flowcept/agents/llm/providers/__init__.py b/src/flowcept/agents/llm/providers/__init__.py
new file mode 100644
index 00000000..e9ee9706
--- /dev/null
+++ b/src/flowcept/agents/llm/providers/__init__.py
@@ -0,0 +1 @@
+"""LLM provider wrappers (Claude on GCP, Gemini 2.5)."""
diff --git a/src/flowcept/agents/llms/claude_gcp.py b/src/flowcept/agents/llm/providers/claude_gcp.py
similarity index 100%
rename from src/flowcept/agents/llms/claude_gcp.py
rename to src/flowcept/agents/llm/providers/claude_gcp.py
diff --git a/src/flowcept/agents/llms/gemini25.py b/src/flowcept/agents/llm/providers/gemini25.py
similarity index 100%
rename from src/flowcept/agents/llms/gemini25.py
rename to src/flowcept/agents/llm/providers/gemini25.py
diff --git a/src/flowcept/agents/llms/__init__.py b/src/flowcept/agents/llms/__init__.py
deleted file mode 100644
index 49212d57..00000000
--- a/src/flowcept/agents/llms/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""LLMs subpackage."""
diff --git a/src/flowcept/agents/mcp/__init__.py b/src/flowcept/agents/mcp/__init__.py
new file mode 100644
index 00000000..533cb8f5
--- /dev/null
+++ b/src/flowcept/agents/mcp/__init__.py
@@ -0,0 +1 @@
+"""MCP server, client, and tool adapters for the Flowcept agent."""
diff --git a/src/flowcept/agents/flowcept_ctx_manager.py b/src/flowcept/agents/mcp/context_manager.py
similarity index 66%
rename from src/flowcept/agents/flowcept_ctx_manager.py
rename to src/flowcept/agents/mcp/context_manager.py
index 8a974496..9bfade01 100644
--- a/src/flowcept/agents/flowcept_ctx_manager.py
+++ b/src/flowcept/agents/mcp/context_manager.py
@@ -1,9 +1,27 @@
-from flowcept.agents.dynamic_schema_tracker import DynamicSchemaTracker
-from flowcept.agents.tools.in_memory_queries.pandas_agent_utils import load_saved_df
+from contextlib import asynccontextmanager
+
+from flowcept.agents.provenance_schema_manager.context_schema_manager import ContextSchemaManager
+from flowcept.agents.provenance_schema_manager.static_schema_builder import (
+ SCHEMA_CONTEXT,
+ assert_schema_documented,
+ build_schema_context,
+)
+from flowcept.agents.data_query_tools.pandas_utils import load_saved_df
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
+from flowcept.commons.flowcept_dataclasses.workflow_object import WorkflowObject
+from flowcept.commons.flowcept_dataclasses.agent_object import AgentObject
+from flowcept.commons.flowcept_dataclasses.blob_object import BlobObject
+from flowcept.commons.task_data_preprocess import (
+ TelemetrySummary,
+ CpuSummary,
+ MemorySummary,
+ DiskSummary,
+ NetworkSummary,
+ summarize_task,
+)
from flowcept.commons.flowcept_logger import FlowceptLogger
-from flowcept.commons.vocabulary import Status
-from flowcept.configs import AGENT
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.configs import AGENT, AGENT_HOST, AGENT_PORT
from mcp.server.fastmcp import FastMCP
import json
@@ -15,8 +33,6 @@
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager, BaseAppContext
-from flowcept.commons.task_data_preprocess import summarize_task
-
AGENT_DEBUG = AGENT.get("debug", False)
@@ -64,6 +80,7 @@ def reset_context(self):
self.objects_df = pd.DataFrame()
self.objects_schema = {}
self.objects_value_examples = {}
+ self.workflow_schema_cache = {}
if AGENT_DEBUG:
from flowcept.commons.flowcept_logger import FlowceptLogger
@@ -105,12 +122,43 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
def __init__(self):
self.context = FlowceptAppContext()
self.tracker_config = dict(max_examples=3, max_str_len=50)
- self.schema_tracker = DynamicSchemaTracker(**self.tracker_config)
- self.objects_schema_tracker = DynamicSchemaTracker(**self.tracker_config)
+ self.schema_manager = ContextSchemaManager(self.context, self.tracker_config)
+ self._seen_activities: dict = {}
self.msgs_counter = 0
self.context_chunk_size = 1 # Should be in the settings
super().__init__(allow_mq_disabled=True)
+ def reset_context(self):
+ """Reset MCP runtime context and workflow-scoped schema trackers."""
+ self.context.reset_context()
+ self.schema_manager.reset()
+ self._seen_activities = {}
+ self.msgs_counter = 0
+
+ @asynccontextmanager
+ async def lifespan(self, app):
+ """Start schema assertions before the MCP server begins serving requests.
+
+ Validates that all domain-class fields have attribute docstrings, then
+ populates ``SCHEMA_CONTEXT`` for use by prompt builders. Raises
+ ``SchemaDocumentationError`` loudly so the server refuses to start when
+ any field is undocumented.
+ """
+ assert_schema_documented(
+ TaskObject,
+ WorkflowObject,
+ AgentObject,
+ BlobObject,
+ TelemetrySummary,
+ CpuSummary,
+ MemorySummary,
+ DiskSummary,
+ NetworkSummary,
+ )
+ SCHEMA_CONTEXT.update(build_schema_context())
+ async with super().lifespan(app) as ctx:
+ yield ctx
+
def message_handler(self, msg_obj: Dict):
"""
Handle an incoming message and update context accordingly.
@@ -127,21 +175,28 @@ def message_handler(self, msg_obj: Dict):
"""
msg_type = msg_obj.get("type", None)
if msg_type == "workflow":
- # Preserve an explicitly loaded workflow when the agent registers its own runtime workflow.
- if msg_obj.get("name") == "flowcept_agent_workflow" and self.context.workflow_msg_obj:
- self.logger.info("Ignoring agent runtime workflow; keeping loaded workflow context.")
+ # Preserve the user-loaded workflow when the agent/chat runtime emits its own workflow.
+ # Compare workflow_ids: if we have a loaded workflow and the incoming message belongs to
+ # a different workflow, ignore it so runtime chat/agent workflows never overwrite the
+ # explicitly loaded provenance workflow.
+ loaded_wf_id = (self.context.workflow_msg_obj or {}).get("workflow_id")
+ incoming_wf_id = msg_obj.get("workflow_id")
+ if loaded_wf_id and incoming_wf_id and loaded_wf_id != incoming_wf_id:
+ self.logger.info("Ignoring runtime workflow (different workflow_id); keeping loaded workflow context.")
return True
self.context.workflow_msg_obj = msg_obj
+ if WorkflowObject.from_dict(msg_obj).workflow_is_finished():
+ self.schema_manager.persist_workflow_schema_snapshot(msg_obj.get("workflow_id"))
return True
if msg_type == "object":
self.context.objects.append(msg_obj)
- self.update_objects_schema_and_add_to_df(objects=[msg_obj])
+ self.schema_manager.update_objects_schema_and_add_to_df(objects=[msg_obj])
return True
if msg_type == "task":
task_msg = TaskObject.from_dict(msg_obj)
- if task_msg.subtype == "llm_task" and task_msg.agent_id == self.agent_id:
+ if task_msg.subtype == PROV_AGENT.AI_MODEL_INVOCATION and task_msg.agent_id == self.agent_id:
self.logger.info(f"Going to ignore our own LLM messages: {task_msg}")
return True
@@ -158,47 +213,18 @@ def message_handler(self, msg_obj: Dict):
FlowceptTask(
agent_id=self.agent_id,
generated={"msg": "Provenance Agent reset context."},
- subtype="agent_task",
+ subtype=PROV_AGENT.AGENT_TOOL,
activity_id="reset_user_context",
).send()
return True
elif task_msg.activity_id == "provenance_query":
- self.logger.info("Received a prov query message!")
- query_text = task_msg.used.get("query")
- from flowcept.agents import ToolResult
- from flowcept.agents.tools.general_tools import prompt_handler
- from flowcept.agents.agent_client import run_tool
-
- resp = run_tool(tool_name=prompt_handler, kwargs={"message": query_text})[0]
-
- try:
- error = None
- status = Status.FINISHED
- tool_result = ToolResult(**json.loads(resp))
- if tool_result.result_is_str():
- generated = {"text": tool_result.result}
- else:
- generated = tool_result.result
- except Exception as e:
- status = Status.ERROR
- error = f"Could not convert the following into a ToolResult:\n{resp}\nException: {e}"
- generated = {"text": str(resp)}
- if self._mq_dao is None:
- self.logger.warning("MQ is disabled; skipping provenance_query response message.")
- else:
- FlowceptTask(
- agent_id=self.agent_id,
- generated=generated,
- stderr=error,
- status=status,
- subtype="agent_task",
- activity_id="provenance_query_response",
- ).send()
-
+ self.logger.info(
+ "Ignoring legacy provenance_query task; explicit workflow query tools are used instead."
+ )
return True
elif (
- task_msg.subtype == "agent_task"
+ task_msg.subtype == PROV_AGENT.AGENT_TOOL
and task_msg.agent_id is not None
and task_msg.agent_id == self.agent_id
):
@@ -219,7 +245,7 @@ def message_handler(self, msg_obj: Dict):
f"Going to add to index! {(self.msgs_counter - self.context_chunk_size, self.msgs_counter)}"
)
try:
- self.update_schema_and_add_to_df(
+ self.schema_manager.update_schema_and_add_to_df(
tasks=self.context.task_summaries[
self.msgs_counter - self.context_chunk_size : self.msgs_counter
]
@@ -231,42 +257,29 @@ def message_handler(self, msg_obj: Dict):
self.logger.error(f"Could not add these tasks to buffer!\n{task_slice}")
self.logger.exception(e)
+ activity_id = msg_obj.get("activity_id")
+ workflow_id = msg_obj.get("workflow_id")
+ if (
+ activity_id
+ and workflow_id
+ and msg_obj.get("ended_at")
+ and msg_obj.get("used")
+ and msg_obj.get("generated")
+ and activity_id not in self._seen_activities.get(workflow_id, set())
+ ):
+ self.schema_manager.update_workflow_schema_cache([msg_obj])
+ self._seen_activities.setdefault(workflow_id, set()).add(activity_id)
+
# self.monitor_chunk()
return True
- def update_schema_and_add_to_df(self, tasks: List[Dict]):
- """Update the schema and add to the DataFrame in context."""
- self.schema_tracker.update_with_tasks(tasks)
- self.context.tasks_schema = self.schema_tracker.get_schema()
- self.context.value_examples = self.schema_tracker.get_example_values()
-
- _df = self._to_context_df(tasks)
- self.context.df = pd.concat([self.context.df, _df], ignore_index=True)
-
- def update_objects_schema_and_add_to_df(self, objects: List[Dict]):
- """Update the object schema and add to the object DataFrame context."""
- self.objects_schema_tracker.update_with_tasks(objects)
- self.context.objects_schema = self.objects_schema_tracker.get_schema()
- self.context.objects_value_examples = self.objects_schema_tracker.get_example_values()
-
- _df = self._to_context_df(objects)
- self.context.objects_df = pd.concat([self.context.objects_df, _df], ignore_index=True)
-
- @staticmethod
- def _to_context_df(records: List[Dict]):
- _df = pd.json_normalize(records)
- for col in _df.columns:
- if _df[col].apply(lambda v: isinstance(v, list)).any():
- _df[col] = _df[col].apply(lambda v: tuple(v) if isinstance(v, list) else v)
- return pd.DataFrame(_df)
-
def monitor_chunk(self):
"""
Perform LLM-based analysis on the current chunk of task messages and send the results.
"""
self.logger.debug(f"Going to begin LLM job! {self.msgs_counter}")
- from flowcept.agents.agent_client import run_tool
+ from flowcept.agents.mcp.mcp_client import run_tool
result = run_tool("analyze_task_chunk")
if len(result):
@@ -289,9 +302,14 @@ def monitor_chunk(self):
if "allowed_hosts" in AGENT:
from mcp.server.transport_security import TransportSecuritySettings
+ allowed_hosts = list(AGENT.get("allowed_hosts") or [])
+ for host in {AGENT_HOST, "localhost", "127.0.0.1", "::1"}:
+ for allowed_host in {host, f"{host}:*", f"{host}:{AGENT_PORT}"}:
+ if allowed_host not in allowed_hosts:
+ allowed_hosts.append(allowed_host)
agent_transport_security = TransportSecuritySettings(
enable_dns_rebinding_protection=True,
- allowed_hosts=AGENT.get("allowed_hosts"),
+ allowed_hosts=allowed_hosts,
)
mcp_flowcept = FastMCP(
@@ -313,8 +331,7 @@ def get_df_context(context_kind="tasks"):
tuple
``(df, schema, value_examples, custom_user_guidance)`` from lifespan context.
"""
- ctx = mcp_flowcept.get_context()
- lifespan_context = ctx.request_context.lifespan_context
+ lifespan_context = ctx_manager.context
if context_kind == "objects":
df = lifespan_context.objects_df
schema = lifespan_context.objects_schema
diff --git a/src/flowcept/agents/agent_client.py b/src/flowcept/agents/mcp/mcp_client.py
similarity index 100%
rename from src/flowcept/agents/agent_client.py
rename to src/flowcept/agents/mcp/mcp_client.py
diff --git a/src/flowcept/agents/mcp/mcp_prompts.py b/src/flowcept/agents/mcp/mcp_prompts.py
new file mode 100644
index 00000000..68e122ef
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_prompts.py
@@ -0,0 +1,42 @@
+"""MCP prompt registrations — all ``@mcp_flowcept.prompt()`` decorators live here.
+
+Separated from the prompt builders in ``prompts/`` so those files have no MCP imports.
+"""
+
+from flowcept.agents.mcp.context_manager import mcp_flowcept, get_df_context, EMPTY_DF_MESSAGE
+from flowcept.agents.prompts.df_query_prompts import build_pandas_code_prompt
+
+
+@mcp_flowcept.prompt(
+ name="build_df_query_prompt",
+ title="Build DataFrame Query Prompt",
+ description="Build prompt context for external LLM code generation over agent DataFrame context.",
+)
+def build_df_query_prompt(query: str, context_kind: str = "tasks") -> str:
+ """Build the internal pandas-code generation prompt for external LLM orchestration.
+
+ Parameters
+ ----------
+ query : str
+ Natural language question to translate into pandas code.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ str
+ Prompt text to guide external LLM code generation.
+ Returns an explanatory message when there is no active DataFrame context.
+ """
+ df, schema, value_examples, custom_user_guidance = get_df_context(context_kind=context_kind)
+ if df is None or not len(df):
+ return EMPTY_DF_MESSAGE
+ current_fields = list(df.columns)
+ return build_pandas_code_prompt(
+ query,
+ schema,
+ value_examples,
+ custom_user_guidance,
+ current_fields,
+ context_kind=context_kind,
+ )
diff --git a/src/flowcept/agents/flowcept_agent.py b/src/flowcept/agents/mcp/mcp_server.py
similarity index 55%
rename from src/flowcept/agents/flowcept_agent.py
rename to src/flowcept/agents/mcp/mcp_server.py
index 5de2481a..1fd327a1 100644
--- a/src/flowcept/agents/flowcept_agent.py
+++ b/src/flowcept/agents/mcp/mcp_server.py
@@ -1,12 +1,22 @@
+"""MCP server entry point for the Flowcept agent."""
+
import json
import os
+import socket
+import time
from threading import Thread
-from flowcept.agents import check_liveness
-from flowcept.agents.agents_utils import ToolResult
-from flowcept.agents.tools.general_tools import prompt_handler
-from flowcept.agents.agent_client import run_tool
-from flowcept.agents.flowcept_ctx_manager import mcp_flowcept, ctx_manager
+from flowcept.agents.mcp.mcp_client import run_tool
+from flowcept.agents.mcp.context_manager import mcp_flowcept, ctx_manager
+
+# Import all mcp_tools modules so their @mcp_flowcept.tool() decorators fire
+from flowcept.agents.mcp.mcp_tools.session_tools import check_liveness
+import flowcept.agents.mcp.mcp_tools.db_query_mcp_tools # noqa: F401
+import flowcept.agents.mcp.mcp_tools.dashboard_mcp_tools # noqa: F401
+import flowcept.agents.mcp.mcp_tools.df_query_mcp_tools # noqa: F401
+import flowcept.agents.mcp.mcp_tools.report_tools # noqa: F401
+import flowcept.agents.mcp.mcp_tools.schema_mcp_tools # noqa: F401
+import flowcept.agents.mcp.mcp_prompts # noqa: F401
from flowcept.commons.flowcept_logger import FlowceptLogger
from flowcept.configs import AGENT_HOST, AGENT_PORT, DUMP_BUFFER_PATH
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
@@ -15,20 +25,16 @@
import uvicorn
-class FlowceptAgent:
- """
- Flowcept agent server wrapper with optional offline buffer loading.
- """
+class FlowceptMCPServer:
+ """Flowcept mcp server wrapper with optional offline buffer loading."""
def __init__(self, buffer_path: str | None = None, buffer_messages: list[dict] | None = None):
- """
- Initialize a FlowceptAgent.
+ """Initialize a Flowcept MCP server.
Parameters
----------
buffer_path : str or None
- Optional path to a JSONL buffer file. When MQ is disabled, the agent
- loads this file once at startup.
+ Optional path to a JSONL buffer file.
buffer_messages : list[dict] or None
Optional list of buffer messages to load directly into the agent context.
"""
@@ -39,8 +45,7 @@ def __init__(self, buffer_path: str | None = None, buffer_messages: list[dict] |
self._server = None
def _load_buffer_messages(self, messages: list[dict]) -> int:
- """
- Load a list of message objects into the agent context.
+ """Load a list of message objects into the agent context.
Returns
-------
@@ -58,9 +63,17 @@ def _load_buffer_messages(self, messages: list[dict]) -> int:
self.logger.info(f"Loaded {count} messages from buffer list.")
return count
+ def reset_context(self):
+ """Reset the MCP agent context without restarting the HTTP server."""
+ ctx_manager.reset_context()
+
+ def load_buffer_messages(self, messages: list[dict]) -> int:
+ """Replace the active MCP context with the provided buffer messages."""
+ self.reset_context()
+ return self._load_buffer_messages(messages)
+
def _load_buffer_once(self) -> int:
- """
- Load messages from a JSONL buffer file into the agent context.
+ """Load messages from a JSONL buffer file into the agent context.
Returns
-------
@@ -91,8 +104,6 @@ def _load_buffer_once(self) -> int:
def _run_server(self):
"""Run the MCP server (blocking call)."""
try:
- # sse-starlette keeps a module-level exit Event bound to the first event loop that
- # served SSE; reset it so this server's fresh loop can serve SSE in the same process.
from sse_starlette.sse import AppStatus
AppStatus.should_exit_event = None
@@ -103,31 +114,51 @@ def _run_server(self):
self._server.run()
def start(self):
- """
- Start the agent server in a background thread.
+ """Start the agent server in a background thread.
Returns
-------
- FlowceptAgent
+ FlowceptMCPServer
The current instance.
"""
- if self.buffer_path is not None:
- if self.buffer_messages is not None:
- self._load_buffer_messages(self.buffer_messages)
- else:
- self._load_buffer_once()
-
- # Daemon thread so the hosting process can always exit (e.g., test runners);
- # long-running deployments block explicitly via wait().
+ if self.buffer_path is not None or self.buffer_messages is not None:
+ self.reset_context()
+ if self.buffer_messages is not None:
+ self._load_buffer_messages(self.buffer_messages)
+ elif self.buffer_path is not None:
+ self._load_buffer_once()
+
self._server_thread = Thread(target=self._run_server, daemon=True)
self._server_thread.start()
- self.logger.info(f"Flowcept agent server started on {AGENT_HOST}:{AGENT_PORT}")
+ self._wait_until_ready()
+ self.logger.info(f"Flowcept mcp server started on {AGENT_HOST}:{AGENT_PORT}")
return self
+ def _wait_until_ready(self, timeout_sec: float = 10.0):
+ """Wait until the local MCP TCP listener accepts connections."""
+ deadline = time.time() + timeout_sec
+ while time.time() < deadline:
+ try:
+ with socket.create_connection((AGENT_HOST, AGENT_PORT), timeout=0.2):
+ return
+ except OSError:
+ time.sleep(0.05)
+ raise TimeoutError(f"Flowcept MCP server did not start on {AGENT_HOST}:{AGENT_PORT}.")
+
+ def _wait_until_stopped(self, timeout_sec: float = 10.0):
+ """Wait until the local MCP TCP listener stops accepting connections."""
+ deadline = time.time() + timeout_sec
+ while time.time() < deadline:
+ try:
+ with socket.create_connection((AGENT_HOST, AGENT_PORT), timeout=0.2):
+ time.sleep(0.05)
+ except OSError:
+ return
+ self.logger.warning(f"Flowcept MCP server still appears reachable on {AGENT_HOST}:{AGENT_PORT}.")
+
def stop(self):
"""Stop the agent server and wait briefly for shutdown."""
if self._server is None and self._server_thread is not None:
- # The server object is created inside the thread; give it a moment to appear.
self._server_thread.join(timeout=1)
if self._server is not None:
self._server.should_exit = True
@@ -135,38 +166,17 @@ def stop(self):
self._server_thread.join(timeout=5)
if self._server_thread.is_alive():
self.logger.warning("Agent server thread did not stop within 5s; continuing shutdown.")
+ self._wait_until_stopped()
def wait(self):
"""Block until the server thread exits."""
if self._server_thread is not None:
self._server_thread.join()
- def query(self, message: str) -> ToolResult:
- """
- Send a prompt to the agent's main router tool and return the response.
- """
- try:
- resp = run_tool(tool_name=prompt_handler, kwargs={"message": message})[0]
- except Exception as e:
- return ToolResult(code=400, result=f"Error executing tool prompt_handler: {e}", tool_name="prompt_handler")
-
- try:
- return ToolResult(**json.loads(resp))
- except Exception as e:
- return ToolResult(
- code=499,
- result=f"Could not parse tool response as JSON: {resp}",
- extra=str(e),
- tool_name="prompt_handler",
- )
-
def main():
- """
- Start the MCP server.
- """
- agent = FlowceptAgent().start()
- # Wake up tool call
+ """Start the MCP server."""
+ agent = FlowceptMCPServer().start()
print(run_tool(check_liveness, host=AGENT_HOST, port=AGENT_PORT)[0])
agent.wait()
diff --git a/src/flowcept/agents/mcp/mcp_tools/__init__.py b/src/flowcept/agents/mcp/mcp_tools/__init__.py
new file mode 100644
index 00000000..9b02dffb
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/__init__.py
@@ -0,0 +1,4 @@
+"""Thin MCP tool wrappers over data_query_tools/ cores."""
+
+from flowcept.agents.mcp.mcp_tools.report_tools import generate_workflow_card # noqa: F401
+from flowcept.agents.mcp.mcp_tools.session_tools import check_liveness # noqa: F401
diff --git a/src/flowcept/agents/mcp/mcp_tools/dashboard_mcp_tools.py b/src/flowcept/agents/mcp/mcp_tools/dashboard_mcp_tools.py
new file mode 100644
index 00000000..416cab04
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/dashboard_mcp_tools.py
@@ -0,0 +1,30 @@
+"""Thin MCP wrappers for dashboard agent tools."""
+
+from typing import Any, Dict, Optional
+
+from flowcept.agents.data_query_tools import dashboard_tools
+from flowcept.agents.mcp.context_manager import mcp_flowcept
+from flowcept.agents.tool_result import ToolResult
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.instrumentation.flowcept_agent_task import agent_flowcept_task
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def make_chart(card_spec: Dict[str, Any], context: Optional[Dict[str, Any]] = None) -> ToolResult:
+ """Build a chart from a declarative dashboard card spec."""
+ return dashboard_tools.make_chart(card_spec=card_spec, context=context)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def get_dashboard(dashboard_id: str) -> ToolResult:
+ """Get a stored dashboard spec by id."""
+ return dashboard_tools.get_dashboard(dashboard_id=dashboard_id)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def update_dashboard(dashboard_id: str, spec: Dict[str, Any]) -> ToolResult:
+ """Replace a stored dashboard spec with a complete revised spec."""
+ return dashboard_tools.update_dashboard(dashboard_id=dashboard_id, spec=spec)
diff --git a/src/flowcept/agents/mcp/mcp_tools/db_query_mcp_tools.py b/src/flowcept/agents/mcp/mcp_tools/db_query_mcp_tools.py
new file mode 100644
index 00000000..d5203480
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/db_query_mcp_tools.py
@@ -0,0 +1,80 @@
+"""Thin MCP wrappers exposing DB provenance query tools to external agent clients.
+
+One-liner delegates to :mod:`flowcept.agents.data_query_tools.db_query_tools`.
+No business logic here — all logic lives in ``data_query_tools/``.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.mcp.context_manager import mcp_flowcept
+from flowcept.agents.data_query_tools import db_query_tools
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.instrumentation.flowcept_agent_task import agent_flowcept_task
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def query_tasks(
+ filter: Optional[Dict[str, Any]] = None,
+ projection: Optional[List[str]] = None,
+ limit: int = 100,
+ sort: Optional[List[Dict[str, Any]]] = None,
+) -> ToolResult:
+ """Query task provenance records in the database with a Mongo-style filter."""
+ return db_query_tools.query_tasks(filter=filter, projection=projection, limit=limit, sort=sort)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def query_workflows(filter: Optional[Dict[str, Any]] = None, limit: int = 100) -> ToolResult:
+ """Query workflow provenance records in the database with a Mongo-style filter."""
+ return db_query_tools.query_workflows(filter=filter, limit=limit)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def get_task_summary(filter: Optional[Dict[str, Any]] = None) -> ToolResult:
+ """Summarize tasks matching a filter: status counts, per-activity durations, time range."""
+ return db_query_tools.get_task_summary(filter=filter)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def list_campaigns(campaign_id: Optional[str] = None) -> ToolResult:
+ """List derived campaign summaries (campaigns group workflows and tasks)."""
+ return db_query_tools.list_campaigns(campaign_id=campaign_id)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def list_agents(filter: Optional[Dict[str, Any]] = None) -> ToolResult:
+ """List derived agent summaries (agents observed in task provenance)."""
+ return db_query_tools.list_agents(filter=filter)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def query_objects(
+ filter: Optional[Dict[str, Any]] = None,
+ projection: Optional[Any] = None,
+ limit: int = 100,
+) -> ToolResult:
+ """Query stored data-object records (ML models, datasets, blobs) with a Mongo-style filter.
+
+ Use for model parameters, dataset metadata, artifact sizes, or file types.
+ """
+ if isinstance(projection, dict):
+ projection = [k for k, v in projection.items() if v]
+ return db_query_tools.query_objects(filter=filter, projection=projection or None, limit=limit)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def highlight_lineage(
+ task_ids: Optional[List[str]] = None,
+ filter: Optional[Dict[str, Any]] = None,
+ workflow_id: Optional[str] = None,
+) -> ToolResult:
+ """Return seed tasks for UI lineage highlighting."""
+ return db_query_tools.highlight_lineage(task_ids=task_ids, filter=filter, workflow_id=workflow_id)
diff --git a/src/flowcept/agents/mcp/mcp_tools/df_query_mcp_tools.py b/src/flowcept/agents/mcp/mcp_tools/df_query_mcp_tools.py
new file mode 100644
index 00000000..5d61fec1
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/df_query_mcp_tools.py
@@ -0,0 +1,137 @@
+"""Thin MCP wrappers for DF (DataFrame) query tools.
+
+One-liner delegates to :mod:`flowcept.agents.data_query_tools.df_query_tools`.
+MCP context lookup (df, schema, value_examples, custom_user_guidance) happens here.
+"""
+
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.mcp.context_manager import mcp_flowcept, get_df_context, ctx_manager, EMPTY_DF_MESSAGE
+from flowcept.agents.data_query_tools import df_query_tools as _core
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.instrumentation.flowcept_agent_task import agent_flowcept_task
+
+_WORKFLOW_HEAVY_FIELDS = frozenset(
+ {
+ "machine_info",
+ "flowcept_settings",
+ "code_repository",
+ "conf",
+ "extra_metadata",
+ "environment_id",
+ "sys_name",
+ "interceptor_ids",
+ "adapter_id",
+ "flowcept_version",
+ }
+)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def get_workflow_context() -> ToolResult:
+ """Return the in-memory workflow record(s) currently loaded in the agent context.
+
+ The DF path stores workflow provenance in the MCP context rather than in the
+ tasks DataFrame. This tool is the DF-path counterpart to the DB-path
+ ``query_workflows`` tool: both return ``{items, count}`` with heavy
+ infrastructure fields stripped.
+
+ Returns
+ -------
+ ToolResult
+ ``result`` holds ``{"items": [...], "count": int}``.
+ """
+ wf = ctx_manager.context.workflow_msg_obj
+ if not wf:
+ return ToolResult(code=404, result="No workflow loaded in agent context.", tool_name="get_workflow_context")
+ pruned = {k: v for k, v in wf.items() if k not in _WORKFLOW_HEAVY_FIELDS}
+ # Add a lightweight hardware_summary from machine_info so hardware questions can be answered.
+ machine_info = wf.get("machine_info")
+ if machine_info and isinstance(machine_info, dict):
+ for node_data in machine_info.values():
+ if isinstance(node_data, dict):
+ hw: dict = {}
+ if "platform" in node_data:
+ hw["platform"] = node_data["platform"]
+ if "cpu" in node_data:
+ cpu = node_data["cpu"]
+ hw["cpu"] = {k: cpu[k] for k in ("brand_raw", "arch", "count") if k in cpu}
+ if hw:
+ pruned["hardware_summary"] = hw
+ break
+ return ToolResult(code=301, result={"items": [pruned], "count": 1}, tool_name="get_workflow_context")
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def run_df_query(query: str, llm=None, plot: bool = False, context_kind: str = "tasks") -> ToolResult:
+ r"""Run a natural language query against the current context DataFrame.
+
+ This tool retrieves the active DataFrame, schema, and example values
+ from the MCP Flowcept context and uses an LLM to process the query.
+
+ Parameters
+ ----------
+ query : str
+ Natural language query or Python code snippet.
+ llm : callable, optional
+ LLM callable. Built from settings if None.
+ plot : bool, optional
+ If True, generate plotting code.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ df, schema, value_examples, custom_user_guidance = get_df_context(context_kind=context_kind)
+ return _core.run_df_query(
+ query=query,
+ df=df,
+ schema=schema,
+ value_examples=value_examples,
+ custom_user_guidance=custom_user_guidance,
+ llm=llm,
+ plot=plot,
+ context_kind=context_kind,
+ )
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def execute_generated_df_code(user_code: str, context_kind: str = "tasks") -> ToolResult:
+ """Execute externally generated pandas code against the current agent DataFrame.
+
+ Parameters
+ ----------
+ user_code : str
+ Pandas code expected to assign output to ``result``.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ ToolResult
+ """
+ df, _, _, _ = get_df_context(context_kind=context_kind)
+ if df is None or not len(df):
+ return ToolResult(code=404, result=EMPTY_DF_MESSAGE)
+ return _core.execute_df_code(user_code=user_code, df=df)
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def extract_or_fix_python_code(raw_text: str, runtime_error: str = None, context_kind: str = "tasks") -> ToolResult:
+ """Extract or repair pandas code using the current agent DataFrame columns."""
+ from flowcept.agents.llm.builders import build_llm_model
+
+ df, _, _, _ = get_df_context(context_kind=context_kind)
+ if df is None or not len(df):
+ return ToolResult(code=404, result=EMPTY_DF_MESSAGE)
+ return _core.extract_or_fix_python_code(
+ build_llm_model(track_tools=False),
+ raw_text,
+ list(df.columns),
+ runtime_error=runtime_error,
+ )
diff --git a/src/flowcept/agents/mcp/mcp_tools/report_tools.py b/src/flowcept/agents/mcp/mcp_tools/report_tools.py
new file mode 100644
index 00000000..c0a3ffb1
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/report_tools.py
@@ -0,0 +1,59 @@
+"""MCP tool for generating workflow provenance cards.
+
+Split from ``general_tools.py`` — thin wrapper around ``Flowcept.generate_report``.
+"""
+
+from flowcept import Flowcept
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.mcp.context_manager import mcp_flowcept
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.instrumentation.flowcept_agent_task import agent_flowcept_task
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def generate_workflow_card(
+ workflow_id: str = None,
+ campaign_id: str = None,
+ input_jsonl_path: str = None,
+) -> ToolResult:
+ """Generate and return a markdown workflow card as text.
+
+ Exactly one of ``workflow_id``, ``campaign_id``, or ``input_jsonl_path`` must be provided.
+
+ Parameters
+ ----------
+ workflow_id : str, optional
+ Query by workflow identifier.
+ campaign_id : str, optional
+ Query by campaign identifier (produces a campaign-level card).
+ input_jsonl_path : str, optional
+ Path to a Flowcept JSONL buffer file used as input instead of the DB.
+
+ Returns
+ -------
+ ToolResult
+ ``code=301`` with markdown text in ``result["markdown"]`` on success,
+ or an error payload on failure.
+ """
+ try:
+ if not any([workflow_id, campaign_id, input_jsonl_path]):
+ return ToolResult(code=400, result="One of workflow_id, campaign_id, or input_jsonl_path is required.")
+
+ stats = Flowcept.generate_report(
+ report_type="workflow_card",
+ format="markdown",
+ workflow_id=workflow_id,
+ campaign_id=campaign_id,
+ input_jsonl_path=input_jsonl_path,
+ )
+ return ToolResult(
+ code=301,
+ result={
+ "workflow_id": workflow_id,
+ "campaign_id": campaign_id,
+ "markdown": stats["markdown"],
+ },
+ )
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
diff --git a/src/flowcept/agents/mcp/mcp_tools/schema_mcp_tools.py b/src/flowcept/agents/mcp/mcp_tools/schema_mcp_tools.py
new file mode 100644
index 00000000..90ebae7d
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/schema_mcp_tools.py
@@ -0,0 +1,28 @@
+"""MCP tools for workflow-scoped schema context."""
+
+from typing import Optional
+
+from flowcept.agents.mcp.context_manager import ctx_manager, mcp_flowcept
+from flowcept.agents.prompts.db_query_prompts import build_db_schema_context
+from flowcept.agents.tool_result import ToolResult
+from flowcept.commons.vocabulary import PROV_AGENT
+from flowcept.instrumentation.flowcept_agent_task import agent_flowcept_task
+
+
+@mcp_flowcept.tool()
+@agent_flowcept_task(subtype=PROV_AGENT.AGENT_TOOL)
+def get_workflow_schema_context(workflow_id: Optional[str] = None) -> ToolResult:
+ """Return workflow-scoped dynamic schema context for DB and runtime queries."""
+ snapshot = ctx_manager.schema_manager.get_workflow_schema_snapshot(workflow_id)
+ if not snapshot:
+ return ToolResult(code=404, result="No workflow schema context is available.")
+ prompt_context = build_db_schema_context(
+ dynamic_schema=snapshot.get("dynamic_schema"),
+ example_values=snapshot.get("value_examples"),
+ current_fields=snapshot.get("current_fields"),
+ )
+ return ToolResult(
+ code=301,
+ result={"workflow_id": workflow_id, "schema": snapshot, "prompt_context": prompt_context},
+ tool_name="get_workflow_schema_context",
+ )
diff --git a/src/flowcept/agents/mcp/mcp_tools/session_tools.py b/src/flowcept/agents/mcp/mcp_tools/session_tools.py
new file mode 100644
index 00000000..79091896
--- /dev/null
+++ b/src/flowcept/agents/mcp/mcp_tools/session_tools.py
@@ -0,0 +1,150 @@
+"""Session-level MCP tools: liveness, LLM check, guidance recording, and context reset."""
+
+import json
+from typing import List
+
+from flowcept.agents.tool_result import ToolResult
+from flowcept.agents.llm.builders import build_llm_model
+from flowcept.agents.mcp.context_manager import ctx_manager, mcp_flowcept
+
+
+def _with_message_type(message: dict) -> dict:
+ """Return a message with Flowcept type inferred for persisted DB documents."""
+ if message.get("type"):
+ return message
+ typed = dict(message)
+ if typed.get("object_id"):
+ typed["type"] = "object"
+ elif typed.get("task_id") or typed.get("activity_id"):
+ typed["type"] = "task"
+ elif typed.get("workflow_id"):
+ typed["type"] = "workflow"
+ return typed
+
+
+@mcp_flowcept.tool()
+def get_latest(n: int = None) -> str:
+ """Return the most recent task(s) from the task buffer.
+
+ Parameters
+ ----------
+ n : int, optional
+ Number of most recent tasks to return. If None, return only the latest.
+
+ Returns
+ -------
+ str
+ JSON-encoded task(s).
+ """
+ tasks = ctx_manager.context.tasks
+ if not tasks:
+ return "No tasks available."
+ if n is None:
+ return json.dumps(tasks[-1])
+ return json.dumps(tasks[-n])
+
+
+@mcp_flowcept.tool()
+def check_liveness() -> str:
+ """Confirm the agent is alive and responding.
+
+ Returns
+ -------
+ str
+ Liveness status string.
+ """
+ return f"I'm {mcp_flowcept.name} and I'm ready!"
+
+
+@mcp_flowcept.tool()
+def check_llm() -> str:
+ """Check connectivity and response from the LLM backend.
+
+ Returns
+ -------
+ str
+ LLM response.
+ """
+ llm = build_llm_model()
+ return llm("Hello?")
+
+
+@mcp_flowcept.tool()
+def record_guidance(message: str) -> ToolResult:
+ """Record a custom guidance message in agent memory.
+
+ Parameters
+ ----------
+ message : str
+ Guidance text to record.
+
+ Returns
+ -------
+ ToolResult
+ """
+ message = message.replace("@record", "")
+ custom_guidance: List = ctx_manager.context.custom_guidance
+ custom_guidance.append(message)
+ return ToolResult(code=201, result=f"Ok. I recorded in my memory: {message}")
+
+
+@mcp_flowcept.tool()
+def show_records() -> ToolResult:
+ """List all recorded user guidance.
+
+ Returns
+ -------
+ ToolResult
+ """
+ try:
+ custom_guidance: List = ctx_manager.context.custom_guidance
+ if not custom_guidance:
+ message = "There is no recorded user guidance."
+ else:
+ message = "This is the list of custom guidance I have in my memory:\n"
+ message += "\n".join(f" - {msg}" for msg in custom_guidance)
+ return ToolResult(code=201, result=message)
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
+
+
+@mcp_flowcept.tool()
+def reset_records() -> ToolResult:
+ """Reset all recorded user guidance.
+
+ Returns
+ -------
+ ToolResult
+ """
+ try:
+ ctx_manager.context.custom_guidance = []
+ return ToolResult(code=201, result="Custom guidance reset.")
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
+
+
+@mcp_flowcept.tool()
+def reset_context() -> ToolResult:
+ """Reset all agent context.
+
+ Returns
+ -------
+ ToolResult
+ """
+ try:
+ ctx_manager.reset_context()
+ return ToolResult(code=201, result="Context reset.")
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
+
+
+@mcp_flowcept.tool()
+def load_buffer_messages(messages: List[dict]) -> ToolResult:
+ """Replace active MCP context with provided Flowcept buffer messages."""
+ try:
+ ctx_manager.reset_context()
+ for msg_obj in messages:
+ ctx_manager.message_handler(_with_message_type(msg_obj))
+ return ToolResult(code=201, result={"count": len(messages)})
+ except Exception as e:
+ return ToolResult(code=499, result=str(e))
diff --git a/src/flowcept/agents/prompts/README.md b/src/flowcept/agents/prompts/README.md
new file mode 100644
index 00000000..12d060ae
--- /dev/null
+++ b/src/flowcept/agents/prompts/README.md
@@ -0,0 +1,27 @@
+# Agents Prompts
+
+This directory contains all prompt builder functions for the Flowcept agent subsystem.
+
+## Files
+
+| File | Purpose |
+|---|---|
+| `base_prompts.py` | `BASE_ROLE`, `build_single_task_prompt`, `build_multitask_prompt` — schema-aware analysis prompts using `SCHEMA_CONTEXT` |
+| `db_query_prompts.py` | `build_db_filter_prompt` — generates Mongo-style filter JSON for DB queries |
+| `in_memory_task_query_prompts.py` | Prompt builders for in-memory task DataFrame queries (`generate_pandas_code_prompt`, `generate_plot_code_prompt`, etc.) |
+| `in_memory_workflow_query_prompts.py` | Prompt builders for querying the active workflow message object |
+| `general_prompts.py` | Routing and small-talk prompts; `ROUTING_PROMPT`, `SMALL_TALK_PROMPT` |
+| `chat_prompts.py` | `build_chat_system_prompt` — system prompt builder for the webservice chat endpoint |
+
+## Design Rules
+
+1. **No MCP imports** — prompt files must never import `mcp_flowcept` or `FastMCP`.
+ - The `@mcp_flowcept.prompt()` registrations live in `prompts/mcp_prompts.py`.
+
+2. **Schema from SCHEMA_CONTEXT** — prompt builders that need field names or types must
+ use `SCHEMA_CONTEXT` from `schema_introspection.py`, not hardcoded strings.
+ `SCHEMA_CONTEXT` is populated at MCP server startup and is a module-level dict.
+
+3. **Naming convention** — all public builder functions are named `build_*_prompt`.
+
+4. **No side effects** — functions are pure builders; they never call LLMs or make DB queries.
diff --git a/src/flowcept/agents/prompts/base_prompts.py b/src/flowcept/agents/prompts/base_prompts.py
new file mode 100644
index 00000000..6ebb3026
--- /dev/null
+++ b/src/flowcept/agents/prompts/base_prompts.py
@@ -0,0 +1,107 @@
+# flake8: noqa: E501
+"""Base prompt builders using SCHEMA_CONTEXT for schema-aware task analysis.
+
+These replace the hardcoded schema strings in ``general_prompts.py`` with
+live schema tables derived from ``SCHEMA_CONTEXT`` (populated at MCP server startup).
+"""
+
+from flowcept.agents.provenance_schema_manager.static_schema_builder import SCHEMA_CONTEXT
+
+BASE_ROLE = (
+ "You are a helpful assistant analyzing provenance data from a large-scale workflow composed of multiple tasks."
+)
+
+
+def _build_schema_table() -> str:
+ """Build a markdown schema reference table from SCHEMA_CONTEXT."""
+ rows = [
+ "| Field | Type | Description |",
+ "|---|---|---|",
+ ]
+ for field in SCHEMA_CONTEXT.get("task_fields", []):
+ rows.append(f"| `{field['name']}` | {field['type']} | {field['description']} |")
+ for field in SCHEMA_CONTEXT.get("telemetry_summary_fields", []):
+ rows.append(f"| `telemetry_summary.{field['name']}` | {field['type']} | {field['description']} |")
+ if not SCHEMA_CONTEXT:
+ rows.append("| *(schema not yet loaded)* | | |")
+ return "\n".join(rows)
+
+
+def _build_data_schema_prompt() -> str:
+ """Return a schema description string for a task object."""
+ return (
+ "A task object has its provenance: input data is stored in the 'used' field (column prefix `used.`), "
+ "output in the 'generated' field (column prefix `generated.`). "
+ "Tasks sharing the same 'workflow_id' belong to the same workflow execution trace. "
+ "Pay attention to the 'tags' field, as it may indicate critical tasks. "
+ "The 'telemetry_summary' field reports CPU, disk, memory, and network usage, along with 'duration_sec'. "
+ "Task placement is stored in the 'hostname' field.\n\n"
+ "### Known task fields\n\n" + _build_schema_table()
+ )
+
+
+_ANALYSIS_CORE = (
+ "Correlations involving 'used' vs 'generated' data are especially important. "
+ "So are relationships between (used or generated) data and resource metrics. "
+ "Highlight outliers or critical information and give actionable insights or recommendations."
+)
+
+
+def _build_prompt(role_suffix: str, job: str, data_label: str, data) -> str:
+ return (
+ f"{BASE_ROLE}{role_suffix}\n\n"
+ f"{_build_data_schema_prompt()}\n\n"
+ f"{job} {_ANALYSIS_CORE}\n\n"
+ f"{data_label}:\n```json\n{data}\n```"
+ )
+
+
+def build_single_task_prompt(task_obj: dict) -> str:
+ """Build a prompt for single-task analysis using the live schema context.
+
+ Parameters
+ ----------
+ task_obj : dict
+ The task object to analyze.
+
+ Returns
+ -------
+ str
+ Formatted analysis prompt.
+ """
+ return _build_prompt(
+ role_suffix=" You are focusing now on a particular task object.",
+ job=(
+ "Your job is to analyze this single task. Find any anomalies, relationships, or correlations between "
+ "input, output, resource usage metrics, task duration, and task placement. "
+ "Explain what this task may be doing, using the data provided."
+ ),
+ data_label="Task object",
+ data=task_obj,
+ )
+
+
+def build_multitask_prompt(task_objs: list) -> str:
+ """Build a prompt for multi-task workflow analysis using the live schema context.
+
+ Parameters
+ ----------
+ task_objs : list
+ The list of task objects to analyze.
+
+ Returns
+ -------
+ str
+ Formatted analysis prompt.
+ """
+ return _build_prompt(
+ role_suffix="",
+ job=(
+ "Your job is to analyze a list of task objects to identify patterns across tasks, anomalies, "
+ "relationships, or correlations between inputs, outputs, resource usage, duration, and task placement. "
+ "Try to infer the purpose of the workflow. "
+ "Use the data provided to justify your analysis."
+ ),
+ data_label="Task objects",
+ data=task_objs,
+ )
diff --git a/src/flowcept/agents/prompts/chat_prompts.py b/src/flowcept/agents/prompts/chat_prompts.py
index 95ef8ecd..d822b59e 100644
--- a/src/flowcept/agents/prompts/chat_prompts.py
+++ b/src/flowcept/agents/prompts/chat_prompts.py
@@ -1,21 +1,153 @@
-"""System prompt for the webservice provenance chat."""
+"""System prompt builder for the webservice provenance chat."""
-CHAT_SYSTEM_PROMPT = """You are the Flowcept provenance assistant, embedded in Flowcept's web UI.
-Flowcept captures workflow provenance: campaigns group workflows; workflows contain tasks;
-tasks record used (inputs), generated (outputs), status, timings, telemetry, and host info;
-binary artifacts (datasets, ML models) are stored as versioned objects.
+from __future__ import annotations
-Key task fields: task_id, workflow_id, campaign_id, activity_id (function name), status
-(FINISHED/ERROR/RUNNING), started_at, ended_at, used.*, generated.*, telemetry_at_start/end
-(cpu, memory, disk, network, process, gpu), hostname, agent_id, tags.
-Key workflow fields: workflow_id, name, campaign_id, user, utc_timestamp.
+import json
+from typing import Any, Dict, Optional
-You have tools to query this data. Rules:
+_TASK_KEY_FIELDS = {
+ "task_id",
+ "activity_id",
+ "workflow_id",
+ "campaign_id",
+ "agent_id",
+ "status",
+ "started_at",
+ "ended_at",
+ "used",
+ "generated",
+ "hostname",
+ "tags",
+ "parent_task_id",
+ "telemetry_at_start",
+ "telemetry_at_end",
+}
+_WORKFLOW_KEY_FIELDS = {"workflow_id", "name", "campaign_id", "user", "utc_timestamp"}
+_BLOB_KEY_FIELDS = {"object_id", "object_type", "task_id", "workflow_id", "tags", "version"}
+
+
+def _build_schema_section() -> str:
+ """Build field descriptions from SCHEMA_CONTEXT; fall back to safe static text."""
+ try:
+ from flowcept.agents.provenance_schema_manager.static_schema_builder import (
+ SCHEMA_CONTEXT,
+ build_schema_context,
+ )
+
+ ctx = SCHEMA_CONTEXT if SCHEMA_CONTEXT else build_schema_context()
+ except Exception:
+ ctx = {}
+
+ def _fmt(fields, key_set):
+ parts = []
+ for f in fields:
+ if f["name"] in key_set:
+ desc = f.get("description", "")
+ parts.append(f"`{f['name']}`" + (f" ({desc})" if desc else ""))
+ return ", ".join(parts) if parts else None
+
+ task_line = _fmt(ctx.get("task_fields", []), _TASK_KEY_FIELDS)
+ wf_line = _fmt(ctx.get("workflow_fields", []), _WORKFLOW_KEY_FIELDS)
+ blob_line = _fmt(ctx.get("blob_fields", []), _BLOB_KEY_FIELDS)
+
+ if task_line and wf_line and blob_line:
+ return f"Key task fields: {task_line}.\nKey workflow fields: {wf_line}.\nKey object fields: {blob_line}."
+
+ # fallback when SCHEMA_CONTEXT is not yet populated — derived from the key-field sets
+ def _static(key_set):
+ return ", ".join(f"`{n}`" for n in sorted(key_set))
+
+ return (
+ f"Key task fields: {_static(_TASK_KEY_FIELDS)}.\n"
+ f"Key workflow fields: {_static(_WORKFLOW_KEY_FIELDS)}.\n"
+ f"Key object fields: {_static(_BLOB_KEY_FIELDS)}."
+ )
+
+
+def build_chat_system_prompt(context: Optional[Dict[str, Any]] = None) -> str:
+ """Build the system prompt for the webservice provenance chat."""
+ context = dict(context or {})
+ workflow_schema_context = context.pop("workflow_schema_context", None)
+ schema_section = _build_schema_section()
+ prompt = (
+ "You are the Flowcept provenance assistant, embedded in Flowcept's web UI.\n"
+ "Flowcept captures workflow provenance: campaigns group workflows; workflows contain tasks;\n"
+ "tasks record used (inputs), generated (outputs), status, timings, telemetry, and host info;\n"
+ "data objects (versioned binary artifacts) are stored separately with an object_type label.\n\n"
+ + schema_section
+ + "\n\n"
+ )
+ prompt += """You have tools to query this data. Rules:
- Use the tools to answer data questions; never invent values. Quote real numbers from results.
- Filters are Mongo-style; allowed operators: $and $or $nor $not $exists $eq $ne $gt $gte $lt
- $lte $in $nin $regex.
-- When the user context includes workflow_id/campaign_id, scope your queries with it.
+ $lte $in $nin $regex. Never use $options — for case-insensitive regex use the inline flag:
+ {"field": {"$regex": "(?i)pattern"}}.
+- When the user context includes workflow_id/campaign_id, ALWAYS scope your queries with it.
+- For campaigns: ALWAYS call list_campaigns to get campaign details including the human-readable
+ campaign name. Never answer a campaign question from context alone — the context only has IDs.
+- For workflows: when reporting any workflow result, ALWAYS include both the `workflow_id`
+ raw value and the `name` field value explicitly, using their field labels. For a single
+ result write: "workflow_id: , name: ". For multiple results use a markdown
+ table with `workflow_id` and `name` as columns. Never omit either field.
+- When answering about workflow activities, lineage, or execution order, use only activity_id
+ values returned by provenance tools. MCP/chat tool names are not workflow activities unless
+ they explicitly appear as activity_id values in the returned provenance records.
+- For agents: list_agents returns {agent_id (UUID), name (human-readable), activities,
+ task_count}. ALWAYS refer to agents by their `name` field, not by agent_id UUID.
+
+ Two patterns — pick based on whether the question names a SPECIFIC item:
+
+ PATTERN A — Specific named value in task data (the user references a concrete task_id
+ or an identifier/value that appears in a task's used.* or generated.* fields): e.g.
+ "what inputs did the task that used consume?", "which agent submitted the
+ task that processed ?", "what produced ?".
+ Use EXACTLY 3 tool calls — no shortcuts:
+ (1) Call get_task_summary scoped to the workflow_id to discover activity names.
+ (2) Call query_tasks scoped to the workflow_id. Do NOT filter by the specific value —
+ you do not know which used.* or generated.* field stores it. Include
+ projection=["task_id","activity_id","used","generated","agent_id","status"].
+ Inspect BOTH used.* and generated.* fields. If the value appears as generated
+ by one task and used by another, the generated-side task is the upstream
+ producer/submitter of that work item, while the used-side task consumed it.
+ (3) Call list_agents — MANDATORY for attribution. query_tasks returns raw agent_id UUIDs;
+ only list_agents maps them to human-readable agent names and shows which activities
+ each agent ran. Required even if step 2 task data answers the data part.
+ Write your final answer ONLY after all 3 calls complete. The stop-early rule does not
+ apply here — all 3 calls are always required for any Pattern A question.
+
+ PATTERN B — General attribution (no specific value named): e.g. "which agent submitted
+ the work items?", "which agent ran activity X?", "which agent and task submitted the
+ records?". The word "task" in the question does NOT require calling query_tasks —
+ list_agents shows which activities each agent ran.
+ Call list_agents only. Answer directly; do NOT call query_tasks.
+ If the user asks who "submitted work items for" an activity, answer with the
+ upstream agent activity that created/submitted work for that activity. Do not require
+ the target activity itself to have an agent_id; target activity execution and upstream
+ submission are different provenance roles.
+
- Prefer get_task_summary for aggregate questions (counts, durations) over fetching all tasks.
+ When reporting task counts, your response MUST include each activity_id and its task count.
+ Reporting only "X tasks total" without the per-activity list is INCOMPLETE. Always format
+ as: "Activity A: N tasks, Activity B: M tasks, … Total: X tasks."
+- For data lineage and data flow questions ("complete lineage", "data lineage of",
+ "how did X influence Y?", "trace the lineage", "influence subsequent"):
+ Do NOT call highlight_lineage — it is a UI widget action only.
+ Do NOT call query_tasks — task-level details are not needed for lineage questions.
+ Use EXACTLY 2 tool calls — no more, no fewer:
+ (1) get_task_summary — to see all activities and their counts in the workflow.
+ (2) list_agents — to see which agent ran which activities.
+ Even if the question mentions "the best" or "the worst" task: do NOT search for a
+ specific task. All tasks of the same activity type share the same upstream lineage.
+ Write your final answer ONLY after BOTH calls complete. Do NOT call any additional
+ tools after these 2 calls — get_task_summary and list_agents are sufficient.
+ Describe the workflow using `activity_ids` / `activity_counts` returned by get_task_summary
+ and the agent/activity mapping returned by list_agents. For "complete lineage" or
+ execution-order questions, include every activity_id in `activity_ids`. Do not add activity
+ names that are not in those results.
+- highlight_lineage is ONLY for explicit UI highlight requests ("highlight in the graph",
+ "show lineage in the UI", "visually dim unrelated nodes in the graph").
+- When enumerating discrete parameter values (numeric values, category labels, IDs, etc.):
+ ALWAYS list ALL values explicitly rather than giving a range.
- When asked for a chart/plot, call make_chart with a declarative chart spec:
{"chart_id": "", "type": "chart", "title": "...",
"data": {"source": "tasks", "filter": {...}, "group_by": "",
@@ -26,8 +158,15 @@
two sentences.
- To modify the user's dashboard (only when asked), call get_dashboard, then update_dashboard
with the complete revised spec; explain what changed.
-- When the user asks to highlight, trace, show, or visualise the lineage/ancestors/descendants
- of a task, ALWAYS call highlight_lineage. Pass task_ids directly when given, or use filter to
- find the seed tasks first. The UI will visually dim all unrelated nodes in the Dataflow graph.
- Be concise. Use markdown tables for tabular answers. State filters you used.
+- IMPORTANT: after you receive tool results sufficient to answer the question, write your
+ FINAL ANSWER immediately — UNLESS you are in Pattern A (query_tasks + list_agents) or a
+ lineage question (get_task_summary + list_agents), in which case BOTH calls are required
+ before writing your answer. Do NOT call more tools beyond the required set unless the
+ result was empty or returned an error code.
"""
+ if workflow_schema_context:
+ prompt += f"\nWorkflow-specific observed schema context:\n{workflow_schema_context}\n"
+ if context:
+ prompt += f"\nCurrent user context (scope queries with it): {json.dumps(context)}"
+ return prompt
diff --git a/src/flowcept/agents/prompts/db_query_prompts.py b/src/flowcept/agents/prompts/db_query_prompts.py
new file mode 100644
index 00000000..e0662700
--- /dev/null
+++ b/src/flowcept/agents/prompts/db_query_prompts.py
@@ -0,0 +1,103 @@
+# flake8: noqa: E501
+"""Prompt builders for database provenance queries.
+
+All functions are plain Python — no MCP framework imports.
+"""
+
+from flowcept.agents.provenance_schema_manager.static_schema_builder import SCHEMA_CONTEXT
+from flowcept.agents.prompts.schema_prompt_context import (
+ build_allowed_fields_prompt,
+ build_task_structure_prompt,
+)
+
+ALLOWED_FILTER_OPERATORS = frozenset(
+ {
+ "$and",
+ "$or",
+ "$nor",
+ "$not",
+ "$exists",
+ "$eq",
+ "$ne",
+ "$gt",
+ "$gte",
+ "$lt",
+ "$lte",
+ "$in",
+ "$nin",
+ "$regex",
+ }
+)
+
+
+def _build_task_field_list() -> str:
+ """Return a bullet list of valid task field names from SCHEMA_CONTEXT."""
+ fields = [f"`{f['name']}`" for f in SCHEMA_CONTEXT.get("task_fields", [])]
+ fields += [f"`telemetry_summary.{f['name']}`" for f in SCHEMA_CONTEXT.get("telemetry_summary_fields", [])]
+ return "\n".join(f" - {name}" for name in fields) if fields else " *(schema not yet loaded)*"
+
+
+def build_db_schema_context(
+ dynamic_schema: dict = None,
+ example_values: dict = None,
+ current_fields: list[str] = None,
+) -> str:
+ """Build shared schema context for database-backed query prompts."""
+ if current_fields:
+ context = build_allowed_fields_prompt(current_fields, target_name="database task records")
+ if dynamic_schema is not None:
+ context += build_task_structure_prompt(
+ dynamic_schema=dynamic_schema,
+ example_values=example_values or {},
+ current_fields=current_fields,
+ record_description="Each database task record represents one task.",
+ )
+ return context
+ return "## Valid field names\n" + _build_task_field_list()
+
+
+def build_db_filter_prompt(
+ query: str,
+ collection: str = "tasks",
+ dynamic_schema: dict = None,
+ example_values: dict = None,
+ current_fields: list[str] = None,
+) -> str:
+ """Build a prompt that asks an LLM to generate a Mongo-style filter JSON for a DB query.
+
+ Parameters
+ ----------
+ query : str
+ Natural language question to translate into a filter.
+ collection : str, optional
+ Target collection name ("tasks" or "workflows").
+
+ Returns
+ -------
+ str
+ Formatted prompt.
+ """
+ return f"""You are an expert in MongoDB query construction for workflow provenance data.
+The user wants to query the ``{collection}`` collection.
+
+## Valid filter operators
+Only these operators are allowed:
+{", ".join(sorted(ALLOWED_FILTER_OPERATORS))}
+
+{build_db_schema_context(dynamic_schema=dynamic_schema, example_values=example_values, current_fields=current_fields)}
+
+## Rules
+- Use only field names from the list above.
+- Use only operators from the allowlist.
+- Do NOT invent field names or operators.
+- Return only valid JSON — no markdown, no explanations.
+- For missing information, return an empty filter: {{}}
+- Date/time fields use Unix timestamps (seconds since epoch).
+
+## Output format
+Return a single JSON object (the filter). Example:
+{{"activity_id": "process_data", "telemetry_summary.duration_sec": {{"$gt": 60}}}}
+
+User query:
+{query}
+"""
diff --git a/src/flowcept/agents/prompts/df_query_prompts.py b/src/flowcept/agents/prompts/df_query_prompts.py
new file mode 100644
index 00000000..a861c71c
--- /dev/null
+++ b/src/flowcept/agents/prompts/df_query_prompts.py
@@ -0,0 +1,441 @@
+# flake8: noqa: E501
+"""Prompt builders for DF (DataFrame) chat query path.
+
+All functions are plain Python — no MCP framework decorators.
+The ``@mcp_flowcept.prompt()`` registration lives in ``prompts/mcp_prompts.py``.
+"""
+
+from flowcept.agents.prompts.schema_prompt_context import (
+ build_allowed_fields_prompt,
+ build_example_values_prompt,
+ build_task_structure_prompt,
+)
+
+
+def get_df_form(context_kind="tasks"):
+ """Return DataFrame context description string."""
+ if context_kind == "objects":
+ return "The user has a pandas DataFrame called `df`, created from flattened object metadata messages using `pd.json_normalize`."
+ return "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
+
+
+def build_current_df_columns_prompt(current_fields) -> str:
+ """Build the authoritative DataFrame field constraint."""
+ return (
+ build_allowed_fields_prompt(current_fields, target_name="df")
+ + '- If the query cannot be answered using ALLOWED_FIELDS, return exactly: result = "info not available"\n'
+ )
+
+
+def get_example_values_prompt(example_values):
+ """Return example values prompt string."""
+ return build_example_values_prompt(example_values)
+
+
+def get_object_schema_prompt(example_values, current_fields):
+ """Return schema prompt for object context."""
+ schema_prompt = """
+ ## DATAFRAME STRUCTURE
+
+ Each row in `df` represents one workflow object metadata message.
+
+ Important object fields:
+ - `object_type`: semantic object category, such as input_file, dataset, artifact, or ml_model.
+ - `type`: Flowcept message type. For object rows this is usually "object"; do not use it as the object category.
+ - `file_path`: object path when available.
+ - `workflow_id`: workflow associated with the object.
+ - `custom_metadata.*`: user-defined metadata (e.g. model_profile.params, n_samples, split_ratio).
+
+ ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding.
+ ---
+ """
+ return schema_prompt + get_example_values_prompt(example_values)
+
+
+def get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind="tasks"):
+ """Return the full DataFrame schema prompt."""
+ if context_kind == "objects":
+ return get_object_schema_prompt(example_values, current_fields)
+
+ return build_task_structure_prompt(
+ dynamic_schema=dynamic_schema,
+ example_values=example_values,
+ current_fields=current_fields,
+ record_description="Each row in `df` represents a single task.",
+ )
+
+
+def build_plot_code_prompt(query, dynamic_schema, example_values, current_fields, context_kind="tasks") -> str:
+ """Build a prompt for Streamlit chart code generation.
+
+ Parameters
+ ----------
+ query : str
+ Natural language query.
+ dynamic_schema : dict
+ DataFrame schema.
+ example_values : dict
+ Example values.
+ current_fields : list
+ Current DataFrame columns.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ str
+ Formatted prompt.
+ """
+ return f"""
+ You are a Streamlit chart expert.
+ {get_df_form(context_kind)}
+
+ {get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}
+
+ ### 3. Guidelines
+
+ - When plotting from a grouped or aggregated result, set an appropriate column (like activity_id, started_at, etc.) as the index before plotting to ensure x-axis labels are correct.
+ - When aggregating by "activity_id", remember to include .set_index('activity_id') in your response.
+ - Prefer bar charts (`st.bar_chart`) when the x-axis has ≤10 discrete categories (e.g., category labels, discrete parameter values). Use line charts only for continuous/time-series data.
+
+ ### 4. Output Format
+
+ You must write Python code using Streamlit (st) to visualize the requested data.
+
+ - Always assume `df` is already defined.
+ - First, assign the query result to a variable called `result` using pandas.
+ - Then, write the plotting code based on `result`.
+ - Return a Python dictionary with three fields:
+ - `"result_code"`: the pandas code that assigns `result`
+ - `"plot_code"`: the code that creates the Streamlit plot
+ - `"description"`: a one-sentence natural-language caption. It MUST include:
+ (1) the chart type (e.g., "bar chart", "line chart"),
+ (2) the exact field names from result_code verbatim (e.g., "generated.output_field", "used.input_param"),
+ (3) the grouping/index column name,
+ (4) if discrete categories are involved, list them explicitly.
+ ---
+
+ ### 5. Few-Shot Examples
+
+ ```python
+ # Q: Plot the number of tasks by activity
+ {{
+ "result_code": "result = df['activity_id'].value_counts().reset_index().rename(columns={{'index': 'activity_id', 'activity_id': 'count'}})",
+ "plot_code": "st.bar_chart(result.set_index('activity_id'))",
+ "description": "A bar chart of task count by activity_id."
+ }}
+
+ # Q: Show a line chart of task duration per task start time
+ {{
+ "result_code": "result = df[['started_at', 'telemetry_summary.duration_sec']].dropna().set_index('started_at')",
+ "plot_code": "st.line_chart(result)",
+ "description": "A line chart of telemetry_summary.duration_sec over started_at."
+ }}
+
+ Your response must be ONLY a raw JSON object (no markdown fences, no prose), in this exact format:
+ {{"result_code": "", "plot_code": "", "description": ""}}
+
+ User request:
+ {query}
+ """
+
+
+JOB = "You will generate a pandas dataframe code to solve the query."
+ROLE = """You are an expert in scientific and engineering workflow provenance data analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems.
+ You are analyzing provenance data from a complex workflow consisting of numerous tasks."""
+OBJECT_ROLE = """You are an expert in scientific and engineering workflow provenance data analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems.
+ You are analyzing object metadata records from a workflow provenance buffer."""
+QUERY_GUIDELINES = """
+
+ ### 3. Query Guidelines
+
+ - Use `df` as the base DataFrame.
+ - Use `activity_id` to filter by task type (valid values = schema keys).
+ - ONLY IF the ALLOWED_FIELDS list allow, use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
+ - Use `telemetry_summary.duration_sec` for performance-related questions.
+ - Use `hostname` when user mentions *where* a task ran.
+ - Use `agent_id` when the user refers to agents (non-null means task was agent-run).
+
+ ### 4. Hard Constraints (obey strictly, YOUR LIFE DEPENDS ON THEM. DO NOT HALLUCINATE!!!)
+
+ - Always return code in the form `result = df[][[...]]` or `result = df.loc[, [...]]`
+ -**THERE ARE NOT INDIVIDUAL FIELDS NAMED `used` OR `generated`, they are ONLY are prefixes to the field names.**
+ - If the query needs fields that begin with `used.` or `generated.`, your generated query needs to iterate over the df.columns to select the used or generated fields only, such as (adapt when needed): `[col for col in df.columns if col.startswith('generated.')]` or `[col for col in df.columns if col.startswith('used.')]`
+ **THERE ABSOLUTELY ARE NO FIELDS NAMED `used` or `generated`. DO NOT, NEVER use the string 'used' or 'generated' in your generated code!!!**
+ **THE COLUMN 'used' DOES NOT EXIST**
+ **THE COLUMN 'generated' DOES NOT EXIST**
+ - **When filtering by `activity_id`, only select columns that belong to that activity's schema.**
+ - Always observing the ALLOWED_FIELDS list, use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
+ - Explicitly list the selected columns — **never return all columns**
+ - **Only include telemetry columns if used in the query logic.**
+ -THERE IS NOT A FIELD NAMED `telemetry_summary.start_time` or `telemetry_summary.end_time`. Use `started_at` and `ended_at` instead.
+ -THE GENERATED FIELDS ARE LABELED AS SUCH: `generated.()` NOT `generated_output`.
+ -THERE IS NOT A FIELD NAMED `execution_id` or `used.execution_id`.
+ -DO NOT USE `nlargest` or `nsmallest` in the query code, use `sort_values` instead.
+ -WHEN user requests about workflow time, get its latest task's `ended_at` and its earliest task's `started_at` and compute the difference.
+ -WHEN user requests duration per task, utilize `telemetry_summary.duration_sec`.
+
+ If the query asks you to report which values appear in one or more columns, then:
+ For each relevant column, select that column from df, call .dropna(), then .unique() or .value_counts().
+
+ - **CRITICAL — list-valued columns**: NEVER call `.unique()` or `.value_counts()` directly on list-valued columns.
+ Always call `.explode()` first to flatten the lists into individual rows, then aggregate.
+
+ - **Do not include metadata columns unless explicitly required by the user query.**
+
+ - **For filter+aggregate queries** (e.g., "average X for items where Y > Z"): return a DataFrame showing every row that passed the filter (with its key identification columns like item_id or entity_id and the filtered field), not just a scalar aggregate. Include the aggregate as a new column or let the summary describe it.
+ - **For compound queries asking multiple questions in one sentence**: return a single DataFrame that captures all parts. NEVER return a Python list, tuple, or mixed-type collection. Instead build a structured DataFrame.
+ - **To count output fields per activity**: use `gen_cols = [c for c in df.columns if c.startswith('generated.')]` to get generated columns, then use `df.groupby('activity_id')[gen_cols].apply(lambda g: int(g.notna().sum().sum()))` to count the total number of non-null generated field values per activity (this accounts for how many tasks of each activity ran, so a task type that ran 5 times will rank higher than one that ran once even if each has the same number of fields).
+ - **For filter+aggregate queries**: ALWAYS include the primary identifier column(s) for the activity (e.g., any config, item, or entity ID from the schema) in the result DataFrame, so the reader can identify each row without relying on task_id.
+"""
+
+OBJECT_QUERY_GUIDELINES = """
+ ### 3. Query Guidelines
+
+ - Use `df` as the base DataFrame.
+ - Use `object_type` for object category questions.
+ - Use `file_path` for file path questions.
+ - Use `custom_metadata.*` fields for model/dataset metadata (check ALLOWED_FIELDS for available sub-fields).
+ - Use `workflow_id` when the query asks for workflow-specific objects.
+ - The column `type` is the Flowcept message type, not the object category.
+ - Explicitly list selected columns unless the user asks for all columns.
+"""
+
+FEW_SHOTS = """
+ ### 5. Few-Shot Examples
+
+ # Q: How many tasks were processed?
+ result = len(df)
+
+ # Q: How many tasks for each activity?
+ result = df['activity_id'].value_counts()
+
+"""
+
+OBJECT_FEW_SHOTS = """
+ ### 5. Few-Shot Examples
+
+ # Q: How many objects are available?
+ result = len(df)
+
+ # Q: List all distinct object types
+ result = df['object_type'].dropna().unique()
+
+"""
+
+OUTPUT_FORMATTING = """
+ 6. Final Instructions
+ Return only valid pandas code assigned to the variable result.
+
+ Your response must be only the raw Python code in the format:
+ result = ...
+
+ For simple queries: one line is preferred.
+ For compound queries that require intermediate variables: use multiple lines (e.g., define gen_cols, per_act, etc., then assign result on the last line).
+
+ Do not include: Explanations, Markdown formatting, Triple backticks, Comments, or Any text before or after the code block.
+ The output cannot have any markdown, no ```python or ``` at all.
+
+ THE LAST LINE OF YOUR CODE MUST BE: result = ...
+
+ Strictly follow the constraints above.
+"""
+
+
+def build_pandas_code_prompt(
+ query: str, dynamic_schema, example_values, custom_user_guidances, current_fields, context_kind="tasks"
+) -> str:
+ """Build a pandas code generation prompt from a natural language query.
+
+ Parameters
+ ----------
+ query : str
+ Natural language query.
+ dynamic_schema : dict
+ DataFrame schema.
+ example_values : dict
+ Example values.
+ custom_user_guidances : list, optional
+ Custom guidance strings.
+ current_fields : list
+ Current DataFrame columns.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ str
+ Formatted prompt.
+ """
+ if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
+ concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
+ custom_user_guidance_prompt = (
+ f"You MUST consider the following guidance from the user:\n"
+ f"{concatenated_guidance}"
+ "------------------------------------------------------"
+ )
+ else:
+ custom_user_guidance_prompt = ""
+
+ curr_cols = build_current_df_columns_prompt(current_fields)
+ role = OBJECT_ROLE if context_kind == "objects" else ROLE
+ query_guidelines = OBJECT_QUERY_GUIDELINES if context_kind == "objects" else QUERY_GUIDELINES
+ few_shots = OBJECT_FEW_SHOTS if context_kind == "objects" else FEW_SHOTS
+ return (
+ f"{role}"
+ f"{JOB}"
+ f"{get_df_form(context_kind)}"
+ f"{curr_cols}"
+ f"{get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}"
+ f"{query_guidelines}"
+ f"{few_shots}"
+ f"{custom_user_guidance_prompt}"
+ f"{OUTPUT_FORMATTING}"
+ "User Query:"
+ f"{query}"
+ )
+
+
+def build_dataframe_summarizer_prompt(
+ code, reduced_df, dynamic_schema, example_values, query, current_fields, context_kind="tasks"
+) -> str:
+ """Build a prompt that asks the LLM to summarize a query result DataFrame.
+
+ Parameters
+ ----------
+ code : str
+ The pandas code that produced the result.
+ reduced_df : pandas.DataFrame
+ A reduced/sampled version of the result.
+ dynamic_schema : dict
+ DataFrame schema.
+ example_values : dict
+ Example values.
+ query : str
+ The original user query.
+ current_fields : list
+ Current DataFrame columns.
+ context_kind : str, optional
+ "tasks" or "objects".
+
+ Returns
+ -------
+ str
+ Formatted summarization prompt.
+ """
+ job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
+
+ if "image" in reduced_df.columns:
+ reduced_df = reduced_df.drop(columns=["image"])
+
+ return f"""
+ {job}
+
+ Given:
+
+ **User Query**:
+ {query}
+
+ **Query_Code**:
+ {code}
+
+ **Reduced DataFrame `df` contents** (rows sampled from full result):
+ {reduced_df}
+
+ **Original df (before reduction) had this schema:
+ {get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}
+
+ Your task is to produce a concise English answer to the user query.
+
+ Mandatory requirements:
+ 1. Mirror the user's exact vocabulary. If the query says "best", write "best" (not "highest" or "top").
+ If the query says "worst", write "worst" (not "lowest").
+ 2. For queries that find an extremal result (best, worst, highest, lowest, max, min, first, last):
+ - Name the full set that was searched (e.g., "across all tasks of that activity_id" or "among all records returned").
+ - Describe the method: "found by sorting on [column name verbatim] in [ascending/descending] order".
+ 3. For queries that filter by a condition:
+ - Explicitly enumerate every item that passed the filter with its relevant field values
+ (e.g., "item_a (field=value_a), item_b (field=value_b), and item_c (field=value_c)").
+ - Then state the aggregate result.
+ 4. Always include column names verbatim using dot-notation (e.g., "generated.metric_a", "used.param_a").
+ When code uses wildcards like "generated.*", look up the actual field names from the schema
+ and enumerate key specific fields. Use the word "including" when listing output field names.
+
+ In the end, conclude by giving your concise answer as follows: **Response**:
+
+ Note that the user should not know that this is a reduced dataframe.
+ Keep your response focused and complete.
+ """
+
+
+def extract_or_fix_json_code_prompt(raw_text) -> str:
+ """Build a prompt to extract or fix JSON from raw text.
+
+ Parameters
+ ----------
+ raw_text : str
+ Raw text possibly containing JSON.
+
+ Returns
+ -------
+ str
+ Formatted prompt.
+ """
+ return f"""
+ You are a JSON extractor and fixer.
+ You are given a raw message that may include explanations, markdown fences, or partial JSON.
+
+ Your task:
+ 1. Check if the message contains a JSON object or array.
+ 2. If it does, extract and fix the JSON if needed.
+ 3. Ensure all keys and string values are properly quoted.
+ 4. Return only valid, parseable JSON — no markdown, no explanations.
+
+ THE OUTPUT MUST BE A VALID JSON ONLY. DO NOT SAY ANYTHING ELSE.
+
+ User message:
+ {raw_text}
+ """
+
+
+def build_extract_or_fix_python_code_prompt(raw_text, current_fields, runtime_error: str = None) -> str:
+ """Build a prompt to extract or fix pandas code from raw text.
+
+ Parameters
+ ----------
+ raw_text : str
+ Raw text possibly containing Python code.
+ current_fields : list
+ Available DataFrame column names.
+ runtime_error : str, optional
+ Exception message from a previous execution attempt. When provided,
+ the prompt explicitly asks the LLM to fix the runtime error.
+
+ Returns
+ -------
+ str
+ Formatted prompt.
+ """
+ error_section = (
+ f"\n The code previously raised this runtime error — you MUST fix it:\n {runtime_error}\n"
+ if runtime_error
+ else ""
+ )
+ return f"""
+ You are a Pandas DataFrame code extractor and fixer.
+ You are given a raw user message that may include explanations, markdown fences, or partial DataFrame code that queries a DataFrame `df`.
+{error_section}
+ Your task:
+ 1. Check if the message contains a valid DataFrame code.
+ 2. If it does, extract the code.
+ 3. If there are any syntax errors, fix them.
+ 4. Carefully analyze the list of columns in the query. The query must only use fields in this list:
+ ALLOWED_FIELDS = {current_fields}.
+ If there are fields not in this list, replace the fields to match according to the ALLOWED_FIELDS list.
+ 5. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
+
+ ONCE AGAIN, ONLY PRODUCE THE PYTHON CODE. DO NOT SAY ANYTHING ELSE!
+
+ User message:
+ {raw_text}
+ """
diff --git a/src/flowcept/agents/prompts/general_prompts.py b/src/flowcept/agents/prompts/general_prompts.py
deleted file mode 100644
index 53e0afe5..00000000
--- a/src/flowcept/agents/prompts/general_prompts.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# flake8: noqa: E501
-# flake8: noqa: D103
-
-from mcp.server.fastmcp.prompts import base
-
-BASE_ROLE = (
- "You are a helpful assistant analyzing provenance data from a large-scale workflow composed of multiple tasks."
-)
-
-DATA_SCHEMA_PROMPT = (
- "A task object has its provenance: input data is stored in the 'used' field, output in the 'generated' field. "
- "Tasks sharing the same 'workflow_id' belong to the same workflow execution trace. "
- "Pay attention to the 'tags' field, as it may indicate critical tasks. "
- "The 'telemetry_summary' field reports CPU, disk, memory, and network usage, along with 'duration_sec'. "
- "Task placement is stored in the 'hostname' field."
-)
-
-QUESTION_PROMPT = "I am particularly more interested in the following question: %QUESTION%."
-
-SMALL_TALK_PROMPT = "Act as a Workflow Provenance Specialist. I would like to interact with you, but please be concise and brief. This is my message:\n"
-
-ROUTING_PROMPT = (
- "You are a routing assistant for a provenance AI agent. "
- "Given the following user message, classify it into one of the following routes:\n"
- "- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
- "- in_context_query: if the user is querying the provenance data questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe. I expect that most of the interactions will fall in this category.\n"
- "- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
- # "- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
- # "- historical_prov_query: if the user wants to query historical provenance data\n"
- "- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
- "- in_context_query: if you don't know.\n"
- "Respond with only the route label."
- "User message is below:\n "
-)
-
-
-def get_question_prompt(question: str):
- """Generates a user prompt with the given question filled in."""
- return base.UserMessage(QUESTION_PROMPT.replace("%QUESTION%", question))
-
-
-SINGLE_TASK_PROMPT = {
- "role": f"{BASE_ROLE} You are focusing now on a particular task object which I will provide below.",
- "data_schema": DATA_SCHEMA_PROMPT,
- "job": (
- "Your job is to analyze this single task. Find any anomalies, relationships, or correlations between input,"
- " output, resource usage metrics, task duration, and task placement. "
- "Correlations involving 'used' vs 'generated' data are especially important. "
- "So are relationships between (used or generated) data and resource metrics. "
- "Highlight outliers or critical information and give actionable insights or recommendations. "
- "Explain what this task may be doing, using the data provided."
- ),
-}
-
-MULTITASK_PROMPTS = {
- "role": BASE_ROLE,
- "data_schema": DATA_SCHEMA_PROMPT,
- "job": (
- "Your job is to analyze a list of task objects to identify patterns across tasks, anomalies, relationships,"
- " or correlations between inputs, outputs, resource usage, duration, and task placement. "
- "Correlations involving 'used' vs 'generated' data are especially important. "
- "So are relationships between (used or generated) data and resource metrics. "
- "Try to infer the purpose of the workflow. "
- "Highlight outliers or critical tasks and give actionable insights or recommendations. "
- "Use the data provided to justify your analysis."
- ),
-}
-
-BASE_SINGLETASK_PROMPT = [base.UserMessage(SINGLE_TASK_PROMPT[k]) for k in ("role", "data_schema", "job")]
-BASE_MULTITASK_PROMPT = [base.UserMessage(MULTITASK_PROMPTS[k]) for k in ("role", "data_schema", "job")]
diff --git a/src/flowcept/agents/prompts/in_memory_query_prompts.py b/src/flowcept/agents/prompts/in_memory_query_prompts.py
deleted file mode 100644
index c8cf2d9b..00000000
--- a/src/flowcept/agents/prompts/in_memory_query_prompts.py
+++ /dev/null
@@ -1,544 +0,0 @@
-# flake8: noqa: E501
-# flake8: noqa: D103
-from flowcept.agents.flowcept_ctx_manager import EMPTY_DF_MESSAGE, get_df_context, mcp_flowcept
-
-
-def generate_common_task_fields(current_fields):
- # TODO: make this better
- common_task_fields = """
- | Column | Data Type | Description |
- |-------------------------------|-------------|
- """
- common_task_fields += (
- "| `workflow_id` | string | Workflow the task belongs to. Use this field when the query is asking about workflow execution |\n"
- if "workflow_id" in current_fields
- else ""
- )
- common_task_fields += (
- "| `task_id` | string | Task identifier. |\n" if "task_id" in current_fields else ""
- )
- common_task_fields += (
- "| `parent_task_id` | string | A task may be directly linked to others. Use this field when the query asks for a task informed by (or associated with or linked to) other task. |\n"
- if "parent_task_id" in current_fields
- else ""
- )
- common_task_fields += (
- "| `activity_id` | string | Type of task (e.g., 'choose_option'). Use this for \"task type\" queries. One activity_id is linked to multiple task_ids. |\n"
- if "activity_id" in current_fields
- else ""
- )
- common_task_fields += (
- "| `campaign_id` | string | A group of workflows. |\n"
- if "campaign_id" in current_fields
- else ""
- )
- common_task_fields += (
- "| `hostname` | string | Compute node name. |\n" if "hostname" in current_fields else ""
- )
- common_task_fields += (
- "| `agent_id` | string | Set if executed by an agent. |\n"
- if "agent_id" in current_fields
- else ""
- )
- common_task_fields += (
- "| `started_at` | datetime64[ns, UTC] | Start time of a task. Always use this field when the query has any temporal reference related to the workflow execution, such as 'get the first 10 workflow executions' or 'the last workflow execution'. |\n"
- if "started_at" in current_fields
- else ""
- )
- common_task_fields += (
- "| `ended_at` | datetime64[ns, UTC] | End time of a task. |\n"
- if "ended_at" in current_fields
- else ""
- )
- common_task_fields += (
- "| `subtype` | string | Subtype of a task. |\n" if "subtype" in current_fields else ""
- )
- common_task_fields += (
- "| `tags` | List[str] | List of descriptive tags. |\n"
- if "tags" in current_fields
- else ""
- )
- common_task_fields += (
- "| `image` | blob | Raw binary data related to an image. |\n"
- if "image" in current_fields
- else ""
- )
- common_task_fields += (
- "| `telemetry_summary.duration_sec` | float | Task duration (seconds). |\n"
- if "telemetry_summary.duration_sec" in current_fields
- else ""
- )
- common_task_fields += (
- "| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start. |\n"
- if "telemetry_summary.cpu.percent_all_diff" in current_fields
- else ""
- )
- common_task_fields += (
- "| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time (seconds) between task start and end times. |\n"
- if "telemetry_summary.cpu.user_time_diff" in current_fields
- else ""
- )
- common_task_fields += (
- "| `telemetry_summary.cpu.system_time_diff` | float | Difference in CPU system (kernel) time (seconds) used during the task execution. |\n"
- if "telemetry_summary.cpu.system_time_diff" in current_fields
- else ""
- )
- common_task_fields += (
- "| `telemetry_summary.cpu.idle_time_diff` | float | Difference in CPU idle time (seconds) during task end and start. |\n"
- if "telemetry_summary.cpu.idle_time_diff" in current_fields
- else ""
- )
-
- common_task_fields += "\n For any queries involving CPU, use fields that begin with telemetry_summary.cpu"
-
- return common_task_fields
-
-
-def get_df_form(context_kind="tasks"):
- if context_kind == "objects":
- return "The user has a pandas DataFrame called `df`, created from flattened object metadata messages using `pd.json_normalize`."
- return "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
-
-
-CURRENT_DF_COLUMNS_PROMPT = """
-### ABSOLUTE FIELD CONSTRAINT -- THIS IS CRITICAL
-
-The following list is the ONLY valid field names in df. Treat this as the schema:
-
-ALLOWED_FIELDS = [COLS]
-
-You MUST treat this list as authoritative.
-
-- You may only use fields names that appear EXACTLY (string match) in ALLOWED_FIELDS.
-- You are NOT allowed to create new field names by:
- - adding or removing prefixes like "used." or "generated."
- - combining words
- - guessing.
-- If a field name is not in ALLOWED_FIELDS, you MUST NOT use it.
-- If the query cannot be answered using ALLOWED_FIELDS, return exactly: result = "info not available"
-"""
-
-
-def get_example_values_prompt(example_values):
- values_prompt = f"""
- Now, this other dictionary below provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
- Field names do not include `used.` or `generated.` They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
- ```python
- {example_values}
- ```
- """
- return values_prompt
-
-
-def get_object_schema_prompt(example_values, current_fields):
- schema_prompt = """
- ## DATAFRAME STRUCTURE
-
- Each row in `df` represents one workflow object metadata message.
-
- Important object fields:
- - `object_type`: semantic object category, such as input_file, dataset, artifact, or ml_model.
- - `type`: Flowcept message type. For object rows this is usually "object"; do not use it as the object category.
- - `object_size_bytes`: object payload size in bytes.
- - `file_path`: object path when available.
- - `workflow_id`: workflow associated with the object.
-
- ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding.
- ---
- """
- return schema_prompt + get_example_values_prompt(example_values)
-
-
-def get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind="tasks"):
- if context_kind == "objects":
- return get_object_schema_prompt(example_values, current_fields)
-
- schema_prompt = f"""
- ## DATAFRAME STRUCTURE
-
- Each row in `df` represents a single task.
-
- ### 1. Structured task fields:
-
- - **in**: input parameters (columns starting with `used.`)
- - **out**: output metrics/results (columns starting with `generated.`)
-
- The schema for these fields is defined in the dictionary below.
- It maps each activity ID to its inputs (i) and outputs (o), using flattened field names that include `used.` or `generated.` prefixes to indicate the role the field played in the task. These names match the columns in the dataframe `df`.
-
- {dynamic_schema}
- Use this schema and fields to understand what inputs and outputs are valid for each activity.
-
- IMPORTANT: The user might say used for outputs or generated for inputs, which might confuse you. Do not get tricked by the user.
- Ignore the natural-language words "used" and "generated".
- - The English phrase "used in the calculation" does NOT mean you must use a `used.` column.
- - The English word "generated" in the question does NOT force you to use a `generated.` column either.
-
- ALWAYS CHECK THE ALLOWED_FIELDS list before proceeding. THIS IS CRITICAL.
-
- ### 2. Additional fields for tasks:
-
- {generate_common_task_fields(current_fields)}
- ---
- """
-
- values_prompt = get_example_values_prompt(example_values)
- # values_prompt = ""
- prompt = schema_prompt + values_prompt
- return prompt
-
-
-def generate_plot_code_prompt(query, dynamic_schema, example_values, current_fields, context_kind="tasks") -> str:
- PLOT_PROMPT = f"""
- You are a Streamlit chart expert.
- {get_df_form(context_kind)}
-
- {get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}
-
- ### 3. Guidelines
-
- - When plotting from a grouped or aggregated result, set an appropriate column (like activity_id, started_at, etc.) as the index before plotting to ensure x-axis labels are correct.
- - When aggregating by "activity_id", remember to include .set_index('activity_id') in your response.
-
- ### 4. Output Format
-
- You must write Python code using Streamlit (st) to visualize the requested data.
-
- - Always assume `df` is already defined.
- - First, assign the query result to a variable called `result` using pandas.
- - Then, write the plotting code based on `result`.
- - Return a Python dictionary with two fields:
- - `"result_code"`: the pandas code that assigns `result`
- - `"plot_code"`: the code that creates the Streamlit plot
- ---
-
- ### 5. Few-Shot Examples
-
- ```python
- # Q: Plot the number of tasks by activity
- {{
- "result_code": "result = df['activity_id'].value_counts().reset_index().rename(columns={{'index': 'activity_id', 'activity_id': 'count'}})",
- "plot_code": "st.bar_chart(result.set_index('activity_id'))"
- }}
-
- # Q: Show a line chart of task duration per task start time
- {{
- "result_code": "result = df[['started_at', 'telemetry_summary.duration_sec']].dropna().set_index('started_at')",
- "plot_code": "st.line_chart(result)"
- }}
-
- # Q: Plot average scores for simulate_layer tasks
- {{
- "result_code": "result = df[df['activity_id'] == 'simulate_layer'][['generated.scores']].copy()\nresult['avg_score'] = result['generated.scores'].apply(lambda x: sum(eval(str(x))) / len(eval(str(x))) if x else 0)",
- "plot_code": "st.bar_chart(result['avg_score'])"
- }}
-
- # Q: Plot histogram of planned controls count for choose_option
- {{
- "result_code": "result = df[df['activity_id'] == 'choose_option'][['used.planned_controls']].copy()\nresult['n_controls'] = result['used.planned_controls'].apply(lambda x: len(eval(str(x))) if x else 0)",
- "plot_code": "import matplotlib.pyplot as plt\nplt.hist(result['n_controls'])\nst.pyplot(plt)"
- }}
-
- Your response must be only the raw Python code in the format:
- result = ...
- Except for the `result` variable, YOU MUST NEVER CREATE ANY OTHER VARIABLE. NEVER!
-
- User request:
- {query}
-
-
-
- """
- return PLOT_PROMPT
-
-
-JOB = "You will generate a pandas dataframe code to solve the query."
-ROLE = """You are an expert in HPC workflow provenance data analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems.
- You are analyzing provenance data from a complex workflow consisting of numerous tasks."""
-OBJECT_ROLE = """You are an expert in HPC workflow provenance data analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems.
- You are analyzing object metadata records from a workflow provenance buffer."""
-QUERY_GUIDELINES = """
-
- ### 3. Query Guidelines
-
- - Use `df` as the base DataFrame.
- - Use `activity_id` to filter by task type (valid values = schema keys).
- - ONLY IF the ALLOWED_FIELDS list allow, use `used.` for parameters (inputs) and `generated.` for outputs (metrics).
- - Use `telemetry_summary.duration_sec` for performance-related questions.
- - Use `hostname` when user mentions *where* a task ran.
- - Use `agent_id` when the user refers to agents (non-null means task was agent-run).
-
- ### 4. Hard Constraints (obey strictly, YOUR LIFE DEPENDS ON THEM. DO NOT HALLUCINATE!!!)
-
- - Always return code in the form `result = df[][[...]]` or `result = df.loc[, [...]]`
- -**THERE ARE NOT INDIVIDUAL FIELDS NAMED `used` OR `generated`, they are ONLY are prefixes to the field names.**
- - If the query needs fields that begin with `used.` or `generated.`, your generated query needs to iterate over the df.columns to select the used or generated fields only, such as (adapt when needed): `[col for col in df.columns if col.startswith('generated.')]` or `[col for col in df.columns if col.startswith('used.')]`
- **THERE ABSOLUTELY ARE NO FIELDS NAMED `used` or `generated`. DO NOT, NEVER use the string 'used' or 'generated' in your generated code!!!**
- **THE COLUMN 'used' DOES NOT EXIST**
- **THE COLUMN 'generated' DOES NOT EXIST**
- - **When filtering by `activity_id`, only select columns that belong to that activity’s schema.**
- - Always observing the ALLOWED_FIELDS list, use only `used.` and `generated.` fields listed in the schema for that `activity_id`.
- - Explicitly list the selected columns — **never return all columns**
- - **Only include telemetry columns if used in the query logic.**
- -THERE IS NOT A FIELD NAMED `telemetry_summary.start_time` or `telemetry_summary.end_time` or `used.start_time` or `used.end_time`. Use `started_at` and `ended_at` instead when you want to find the duration of a task, activity, or workflow execution.
- -THE GENERATED FIELDS ARE LABELED AS SUCH: `generated.()` NOT `generated_output`. Any reference to `generated_output` is incorrect and should be replaced with `generated.` prefix.
- -THERE IS NOT A FIELD NAMED `execution_id` or `used.execution_id`. Look at the QUERY to decide what correct _id field to use. Any mentions of workflow use `workflow_id`. Any mentions of task use `task_id`. Any mentions of activity use `activity_id`.
- -DO NOT USE `nlargest` or `nsmallest` in the query code, use `sort_values` instead.
- -An activity with a value in the `generated.` column created that value. Whereas an activity that has a value in the `used.` column used that value from another activity. IF THE `used.` and `generated.` fields share the same letter after the dot, that means that the activity associated with the `generated.` was created by another activity and the one with `used.` used that SAME value that was created by the activity with that same value in the `generated.` field.
- -WHEN user requests about workflow time (e.g., total time or duration" or elapsed time or total execution time or elapsed time or makespan about workflow executions or asking about workflows that took longer than a certain threshold or other workflow-related timing question of one or many workflow executions (each is identified by `workflow_id`), get its latest task's `ended_at` and its earliest task's `started_at`and compute the difference between them, like this (adapt when needed): `df.groupby('workflow_id').apply(lambda x: (x['ended_at'].max() - x['started_at'].min()).total_seconds())`
- -WHEN user requests duration or execution time per task or for individual tasks, utilize `telemetry_summary.duration_sec`.
- -WHEN user requests execution time per activity within workflows compute durations using the difference between the last `ended_at` and the first `started_at` grouping by activitiy_id, workflow_id rather than using `telemetry_summary.duration_sec`.
-
- -The first (or the earliest) workflow execution is the one that has the task with earliest `started_at`, so you need to sort the DataFrame based on `started_at` to get the associated workflow_id.
- -The last (or the latest or the most recent) workflow execution is the one that has the task with the latest `ended_at`, so you need to sort the DataFrame based on `ended_at` to get the associated workflow_id.
- - Use this to select the tasks in the first workflow (or in the earliest workflow): df[df.workflow_id == df.loc[df.started_at.idxmin(), 'workflow_id']]
- - Use this to select the tasks in the last workflow (or in the latest workflow or in the most recent workflow or the workflow that started or ended most recently): df[df.workflow_id == df.loc[df.ended_at.idxmax(), 'workflow_id']]
- -WHEN the user requests the "first workflow" (or earliest workflow), you must identify the workflow by using workflow_id of the task with the earliest started_at. DO NOT use the min workflow_id.
- -WHEN the user requests the "last workflow" (or latest workflow or most recent workflow), you must identify the workflow by using workflow_id of the task with the latest `ended_at`. DO NOT use the max workflow_id.
- -Do not use df['workflow_id'].max() or df['workflow_id'].min() to find the first or last workflow execution.
-
- -To select the first (or earliest) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"started_at": 'min'}}).sort_values(by='started_at', ascending=True).head(N)['workflow_id']` - utilize `started_at` to sort!
- -To select the last (or latest or most recent) N workflow executions, use or adapt the following: `df.groupby('workflow_id', as_index=False).agg({{"ended_at": 'max'}}).sort_values(by='ended_at', ascending=False).head(N)['workflow_id']` - utilize `ended_at` to sort!
-
- -If the user does not ask for a specific workflow run, do not use `workflow_id` in your query.
- -To select the first or earliest or initial tasks, use or adapt the following: `df.sort_values(by='started_at', ascending=True)`
- -To select the last or final or most recent tasks, use or adapt the following: `df.sort_values(by='ended_at', ascending=False)`
-
- -If user explicitly asks to display or show all columns or fields, do not project on any particular field or column. Just show all of them.
-
- -WHEN the user requests a "summary" of activities, you must incorporate relevant summary statistics such as min, max, and mean, into the code you generate.
- -Do NOT use df[0] or df[integer value] or df[df[].idxmax()] or df[df[].idxmin()] because these are obviously not valid Pandas Code!
- -**Do NOT use any of those: df[df['started_at'].idxmax()], df[df['started_at'].idxmin()], df[df['ended_at'].idxmin()], df[df['ended_at'].idxmax()]. Those are not valid Pandas Code.**
- - When the query mentions "each task", or "each activity", or "each workflow", make sure you show (project) the correct id column in the results (i.e., respectively: `task_id`, `activity_id`, `workflow_id`) to identify those in the results.
- - Use df[.field_name] == True or df[.field_name] == False when user queries boolean fields, where is either used or generated, depending on the field name. Make sure field_name is a valid field in the DataFrame.
-
- If the query asks you to report which values appear in one or more columns
- (for example “which X were used”, “list all Y”, “what X and Y were generated”), then:
-
- For each relevant column, select that column from df.
- Call .dropna() on that column to remove missing values.
- After dropping NaNs, apply .unique(), .value_counts(), or any other aggregation as needed.
- Select that column.
- Call .dropna() on it.
- Then call .unique(), .value_counts(), or any other aggregation.
-
- - **CRITICAL — list-valued columns**: Some columns store Python lists as cell values
- (identifiable in the schema by element type `et` or shape `s`, e.g. `used.plant_ids`).
- NEVER call `.unique()` or `.value_counts()` directly on these — it raises “unhashable type: list”.
- Always call `.explode()` first to flatten the lists into individual rows, then aggregate:
- result = df['used.plant_ids'].dropna().explode().unique()
-
- - **Do not include metadata columns unless explicitly required by the user query.**
-"""
-
-FEW_SHOTS = """
- ### 5. Few-Shot Examples
-
- # Q: How many tasks were processed?
- result = len(df))
-
- # Q: How many tasks for each activity?
- result = df['activity_id'].value_counts()
-
-"""
-OBJECT_QUERY_GUIDELINES = """
- ### 3. Query Guidelines
-
- - Use `df` as the base DataFrame.
- - Use `object_type` for object category questions.
- - Use `object_size_bytes` for object size questions.
- - Use `file_path` for file path questions.
- - Use `workflow_id` when the query asks for workflow-specific objects.
- - The column `type` is the Flowcept message type, not the object category.
- - Explicitly list selected columns unless the user asks for all columns.
-"""
-OBJECT_FEW_SHOTS = """
- ### 5. Few-Shot Examples
-
- # Q: How many objects are available?
- result = len(df)
-
- # Q: List all input files larger than 100 MB
- result = df[(df['object_type'] == 'input_file') & (df['object_size_bytes'] > 100 * 1000 * 1000)][['workflow_id', 'file_path', 'object_size_bytes']]
-
-"""
-# # Q: What is the average loss across all tasks?
-# result = df['generated.loss'].mean()
-#
-# # Q: select the 'choose_option' tasks executed by the agent, and show the planned controls, generated option, scores, explanations
-# result = df[(df['activity_id'] == 'choose_option') & (df['agent_id'].notna())][
-# ['used.planned_controls', 'generated.option', 'used.scores.scores', 'generated.explanation']].copy()
-#
-# # Q: Show duration and generated scores for 'simulate_layer' tasks
-# result = df[df['activity_id'] == 'simulate_layer'][['telemetry_summary.duration_sec', 'generated.scores']]
-
-OUTPUT_FORMATTING = """
- 6. Final Instructions
- Return only valid pandas code assigned to the variable result.
-
- Your response must be only the raw Python code in the format:
- result = ...
-
- Do not include: Explanations, Markdown formatting, Triple backticks, Comments, or Any text before or after the code block.
- The output cannot have any markdown, no ```python or ``` at all.
-
- THE OUTPUT MUST BE ONE LINE OF VALID PYTHON CODE ONLY, DO NOT SAY ANYTHING ELSE.
-
- Strictly follow the constraints above.
-"""
-
-
-def generate_pandas_code_prompt(
- query: str, dynamic_schema, example_values, custom_user_guidances, current_fields, context_kind="tasks"
-):
- if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
- concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
- custom_user_guidance_prompt = (
- f"You MUST consider the following guidance from the user:\n"
- f"{concatenated_guidance}"
- "------------------------------------------------------"
- )
- else:
- custom_user_guidance_prompt = ""
-
- curr_cols = CURRENT_DF_COLUMNS_PROMPT.replace("[COLS]", str(current_fields))
- role = OBJECT_ROLE if context_kind == "objects" else ROLE
- query_guidelines = OBJECT_QUERY_GUIDELINES if context_kind == "objects" else QUERY_GUIDELINES
- few_shots = OBJECT_FEW_SHOTS if context_kind == "objects" else FEW_SHOTS
- prompt = (
- f"{role}"
- f"{JOB}"
- f"{get_df_form(context_kind)}"
- f"{curr_cols}"
- f"{get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}"
- f"{query_guidelines}"
- f"{few_shots}"
- f"{custom_user_guidance_prompt}"
- f"{OUTPUT_FORMATTING}"
- "User Query:"
- f"{query}"
- )
- return prompt
-
-
-@mcp_flowcept.prompt(
- name="build_df_query_prompt",
- title="Build DataFrame Query Prompt",
- description="Build prompt context for external LLM code generation over agent DataFrame context.",
-)
-def build_df_query_prompt(query: str, context_kind: str = "tasks") -> str:
- """
- Build the internal pandas-code generation prompt for external LLM orchestration.
-
- Parameters
- ----------
- query : str
- Natural language question to translate into pandas code.
-
- Returns
- -------
- str
- Prompt text to guide external LLM code generation.
- Returns an explanatory message when there is no active DataFrame context.
- """
- df, schema, value_examples, custom_user_guidance = get_df_context(context_kind=context_kind)
- if df is None or not len(df):
- return EMPTY_DF_MESSAGE
-
- current_fields = list(df.columns)
- prompt = generate_pandas_code_prompt(
- query,
- schema,
- value_examples,
- custom_user_guidance,
- current_fields,
- context_kind=context_kind,
- )
- return prompt
-
-
-def dataframe_summarizer_context(
- code, reduced_df, dynamic_schema, example_values, query, current_fields, context_kind="tasks"
-) -> str:
- job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
-
- if "image" in reduced_df.columns:
- reduced_df = reduced_df.drop(columns=["image"])
-
- prompt = f"""
- {job}
-
- Given:
-
- **User Query**:
- {query}
-
- **Query_Code**:
- {code}
-
- **Reduced DataFrame `df` contents** (rows sampled from full result):
- {reduced_df}
-
- **Original df (before reduction) had this schema:
- {get_df_schema_prompt(dynamic_schema, example_values, current_fields, context_kind=context_kind)}
-
- Your task is to find a concise and direct answer as an English sentence to the user query.
-
- Only if the answer to the query is complex, provide more explanation by:
- 1. Analyzing the DataFrame values and columns for any meaningful or notable information.
- 2. Comparing the query_code with the data content to understand what the result represents. THIS IS A REDUCED DATAFRAME, the original dataframe, used to answer the query, may be much bigger. IT IS ALREADY KNOWN! Do not need to restate this.
- 3. If it makes sense, provide information beyond the recorded provenance, but state it clearly that you are inferring it.
-
- In the end, conclude by giving your concise answer as follows: **Response**:
-
- Note that the user should not know that this is a reduced dataframe.
- Keep your response short and focused.
-
- """
-
- return prompt
-
-
-def extract_or_fix_json_code_prompt(raw_text) -> str:
- prompt = f"""
- You are a JSON extractor and fixer.
- You are given a raw message that may include explanations, markdown fences, or partial JSON.
-
- Your task:
- 1. Check if the message contains a JSON object or array.
- 2. If it does, extract and fix the JSON if needed.
- 3. Ensure all keys and string values are properly quoted.
- 4. Return only valid, parseable JSON — no markdown, no explanations.
-
- THE OUTPUT MUST BE A VALID JSON ONLY. DO NOT SAY ANYTHING ELSE.
-
- User message:
- {raw_text}
- """
- return prompt
-
-
-def extract_or_fix_python_code_prompt(raw_text, current_fields):
- prompt = f"""
- You are a Pandas DataFrame code extractor and fixer. Pandas is a well-known data science Python library for querying datasets.
- You are given a raw user message that may include explanations, markdown fences, or partial DataFrame code that queries a DataFrame `df`.
-
- Your task:
- 1. Check if the message contains a valid DataFrame code.
- 2. If it does, extract the code.
- 3. If there are any syntax errors, fix them.
- 4. Carefully analyze the list of columns in the query. The query must only use fields in this list:
- ALLOWED_FIELDS = {current_fields}.
- If there are fields not in this list, replace the fields to match according to the ALLOWED_FIELDS list.
- 5. Return only the corrected DataFrame query code — no explanations, no comments, no markdown.
-
- The output must be valid Python code, and must not include any other text.
- Your output can only contain fields in the ALLOWED_FIELDS list.
- This output will be parsed by another program.
-
- ONCE AGAIN, ONLY PRODUCE THE PYTHON CODE. DO NOT SAY ANYTHING ELSE!
-
- User message:
- {raw_text}
- """
- return prompt
diff --git a/src/flowcept/agents/prompts/schema_prompt_context.py b/src/flowcept/agents/prompts/schema_prompt_context.py
new file mode 100644
index 00000000..bc7ea8aa
--- /dev/null
+++ b/src/flowcept/agents/prompts/schema_prompt_context.py
@@ -0,0 +1,88 @@
+# flake8: noqa: E501
+"""Shared schema prompt context for DB and runtime in-memory query paths."""
+
+from typing import Any
+
+from flowcept.agents.provenance_schema_manager.static_schema_builder import SCHEMA_CONTEXT
+
+
+def build_allowed_fields_prompt(current_fields: list[str], target_name: str = "records") -> str:
+ """Build the authoritative allowed-field constraint shared by query prompts."""
+ return f"""
+### ABSOLUTE FIELD CONSTRAINT -- THIS IS CRITICAL
+
+The following list is the ONLY valid field names in {target_name}. Treat this as the schema:
+
+ALLOWED_FIELDS = {current_fields}
+
+You MUST treat this list as authoritative.
+
+- You may only use field names that appear EXACTLY (string match) in ALLOWED_FIELDS.
+- You are NOT allowed to create new field names by:
+ - adding or removing prefixes like "used." or "generated."
+ - combining words
+ - guessing.
+- If a field name is not in ALLOWED_FIELDS, you MUST NOT use it.
+"""
+
+
+def build_example_values_prompt(example_values: dict[str, Any]) -> str:
+ """Build a domain-neutral example-value context block."""
+ return f"""
+Now, this dictionary provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
+Field names do not include `used.` or `generated.`. They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
+```python
+{example_values}
+```
+"""
+
+
+def build_task_static_field_table(current_fields: list[str]) -> str:
+ """Build a markdown table of documented static task fields filtered to current fields."""
+ rows = [
+ " | Column | Data Type | Description |",
+ " |-------------------------------|-----------|-------------|",
+ ]
+ for field in SCHEMA_CONTEXT.get("task_fields", []):
+ if field["name"] in current_fields:
+ rows.append(f" | `{field['name']:<30}` | {field['type']:<9} | {field['description']} |")
+ for field in SCHEMA_CONTEXT.get("telemetry_summary_fields", []):
+ full_name = f"telemetry_summary.{field['name']}"
+ if full_name in current_fields:
+ rows.append(f" | `{full_name:<30}` | {field['type']:<9} | {field['description']} |")
+ if any(f.startswith("telemetry_summary.cpu") for f in current_fields):
+ rows.append(" \n For any queries involving CPU, use fields that begin with telemetry_summary.cpu")
+ return "\n".join(rows)
+
+
+def build_task_structure_prompt(
+ dynamic_schema: dict[str, Any],
+ example_values: dict[str, Any],
+ current_fields: list[str],
+ record_description: str,
+) -> str:
+ """Build shared task schema context from observed dynamic schema and static field docs."""
+ return f"""
+## TASK RECORD STRUCTURE
+
+{record_description}
+
+### 1. Structured task fields:
+
+- **in**: input parameters (fields starting with `used.`)
+- **out**: output metrics/results (fields starting with `generated.`)
+
+The schema below maps each activity ID to its inputs (i) and outputs (o), using flattened field names with `used.` or `generated.` prefixes. These names must match the allowed fields exactly.
+
+{dynamic_schema}
+
+Use this schema to understand what inputs and outputs are valid for each activity.
+
+IMPORTANT: The user might use natural-language words such as "used" or "generated" loosely. Do not infer field names from those words. Always check ALLOWED_FIELDS and the activity schema.
+
+### 2. Additional documented task fields:
+
+{build_task_static_field_table(current_fields)}
+---
+{build_example_values_prompt(example_values)}
+"""
diff --git a/src/flowcept/agents/prompts/workflow_query_prompts.py b/src/flowcept/agents/prompts/workflow_query_prompts.py
deleted file mode 100644
index ac22a90d..00000000
--- a/src/flowcept/agents/prompts/workflow_query_prompts.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# flake8: noqa: E501
-"""Prompt builders for querying the active workflow message object."""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
-
-
-EMPTY_WORKFLOW_MESSAGE = "Current workflow_msg_obj is empty or null."
-
-
-def _flatten_paths(value: Any, prefix: str = "") -> list[str]:
- """Return dot paths for nested dict/list values."""
- if isinstance(value, dict):
- paths = []
- for key, child in value.items():
- child_prefix = f"{prefix}.{key}" if prefix else str(key)
- paths.append(child_prefix)
- paths.extend(_flatten_paths(child, child_prefix))
- return paths
- if isinstance(value, list):
- paths = []
- for idx, child in enumerate(value[:3]):
- child_prefix = f"{prefix}.{idx}" if prefix else str(idx)
- paths.append(child_prefix)
- paths.extend(_flatten_paths(child, child_prefix))
- return paths
- return []
-
-
-def _example_values(workflow_msg_obj: dict, paths: list[str], limit: int = 60) -> dict:
- examples = {}
- for path in paths[:limit]:
- value = _resolve_path(workflow_msg_obj, path)
- if isinstance(value, (dict, list)):
- continue
- examples[path] = value
- return examples
-
-
-def _resolve_path(value: Any, path: str) -> Any:
- current = value
- for part in path.split("."):
- if isinstance(current, dict):
- if part not in current:
- raise KeyError(path)
- current = current[part]
- elif isinstance(current, list):
- current = current[int(part)]
- else:
- raise KeyError(path)
- return current
-
-
-def generate_workflow_query_prompt(query: str, workflow_msg_obj: dict, custom_user_guidance=None) -> str:
- """Build an LLM prompt that maps a free-text workflow question to field paths."""
- paths = _flatten_paths(workflow_msg_obj)
- examples = _example_values(workflow_msg_obj, paths)
- guidance = ""
- if custom_user_guidance:
- guidance = "\n".join(f"- {msg}" for msg in custom_user_guidance)
- guidance = f"\nUser guidance:\n{guidance}\n"
-
- return f"""
-You are an expert in workflow provenance metadata.
-The user has a JSON workflow message object called `workflow_msg_obj`.
-Your job is to translate a free-text question into a strict JSON query spec.
-
-AUTHORITATIVE FIELD PATHS:
-{json.dumps(paths, indent=2, default=str)}
-
-EXAMPLE SCALAR VALUES:
-{json.dumps(examples, indent=2, default=str)}
-{guidance}
-
-Rules:
-- Use only field paths from AUTHORITATIVE FIELD PATHS.
-- Never invent fields or values.
-- If the requested information is absent, include it under `missing`.
-- For workflow description questions, use only an explicit description-like field if present. If none exists, mark it missing.
-- Return only JSON. No markdown, no explanation.
-
-Output format:
-{{"field_paths": ["path.one", "path.two"], "missing": ["human-readable missing item"], "answer_style": "short"}}
-
-Examples:
-Q: what's the workflow name?
-{{"field_paths": ["name"], "missing": [], "answer_style": "short"}}
-
-Q: what was the settings path?
-{{"field_paths": ["conf.settings_path"], "missing": [], "answer_style": "short"}}
-
-Q: what's the workflow description?
-{{"field_paths": [], "missing": ["workflow description"], "answer_style": "short"}}
-
-Q: what hardware was used?
-{{"field_paths": ["machine_info"], "missing": [], "answer_style": "summary"}}
-
-User query:
-{query}
-"""
-
-
-@mcp_flowcept.prompt(
- name="build_workflow_query_prompt",
- title="Build Workflow Query Prompt",
- description="Build prompt context for external LLM workflow-message field selection.",
-)
-def build_workflow_query_prompt(query: str) -> str:
- """Build prompt context for external LLM workflow-message field selection."""
- ctx = mcp_flowcept.get_context()
- workflow_msg_obj = ctx.request_context.lifespan_context.workflow_msg_obj
- if not workflow_msg_obj:
- return EMPTY_WORKFLOW_MESSAGE
- custom_user_guidance = ctx.request_context.lifespan_context.custom_guidance
- return generate_workflow_query_prompt(query, workflow_msg_obj, custom_user_guidance)
diff --git a/src/flowcept/agents/provenance_schema_manager/__init__.py b/src/flowcept/agents/provenance_schema_manager/__init__.py
new file mode 100644
index 00000000..30f2a5d9
--- /dev/null
+++ b/src/flowcept/agents/provenance_schema_manager/__init__.py
@@ -0,0 +1 @@
+"""Schema builders and trackers for Flowcept agent provenance prompts."""
diff --git a/src/flowcept/agents/provenance_schema_manager/context_schema_manager.py b/src/flowcept/agents/provenance_schema_manager/context_schema_manager.py
new file mode 100644
index 00000000..b215d46a
--- /dev/null
+++ b/src/flowcept/agents/provenance_schema_manager/context_schema_manager.py
@@ -0,0 +1,122 @@
+"""Schema management for the MCP agent context.
+
+Owns the per-task, per-object, and per-workflow DynamicSchemaTracker instances
+and all methods that update them or the corresponding DataFrames in context.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List
+
+import pandas as pd
+
+from flowcept.agents.provenance_schema_manager.dynamic_schema_tracker import DynamicSchemaTracker
+from flowcept.commons.flowcept_logger import FlowceptLogger
+
+
+def _to_context_df(records: List[Dict]) -> pd.DataFrame:
+ """Normalize a list of record dicts into a DataFrame, converting list cells to tuples."""
+ _df = pd.json_normalize(records)
+ for col in _df.columns:
+ if _df[col].apply(lambda v: isinstance(v, list)).any():
+ _df[col] = _df[col].apply(lambda v: tuple(v) if isinstance(v, list) else v)
+ return pd.DataFrame(_df)
+
+
+class ContextSchemaManager:
+ """Manages DynamicSchemaTracker instances and DataFrame updates for the MCP agent context.
+
+ Parameters
+ ----------
+ context :
+ The live ``FlowceptAppContext`` whose schema/df fields are updated in place.
+ tracker_config :
+ Keyword args forwarded to every ``DynamicSchemaTracker`` constructor
+ (e.g. ``max_examples``, ``max_str_len``).
+ """
+
+ def __init__(self, context, tracker_config: Dict):
+ self.logger = FlowceptLogger()
+ self._context = context
+ self._tracker_config = tracker_config
+ self._reset_trackers()
+
+ def reset(self):
+ """Reset all trackers to a clean state (called when agent context is reset)."""
+ self._reset_trackers()
+
+ def _reset_trackers(self):
+ self.schema_tracker = DynamicSchemaTracker(**self._tracker_config)
+ self.objects_schema_tracker = DynamicSchemaTracker(**self._tracker_config)
+ self.workflow_schema_trackers: Dict = {}
+
+ def update_schema_and_add_to_df(self, tasks: List[Dict]):
+ """Update the task schema tracker and append normalised rows to the context DataFrame."""
+ self.schema_tracker.update_with_tasks(tasks)
+ self._context.tasks_schema = self.schema_tracker.get_schema()
+ self._context.value_examples = self.schema_tracker.get_example_values()
+
+ _df = _to_context_df(tasks)
+ self._context.df = pd.concat([self._context.df, _df], ignore_index=True)
+
+ def update_objects_schema_and_add_to_df(self, objects: List[Dict]):
+ """Update the object schema tracker and append normalised rows to the objects DataFrame."""
+ self.objects_schema_tracker.update_with_tasks(objects)
+ self._context.objects_schema = self.objects_schema_tracker.get_schema()
+ self._context.objects_value_examples = self.objects_schema_tracker.get_example_values()
+
+ _df = _to_context_df(objects)
+ self._context.objects_df = pd.concat([self._context.objects_df, _df], ignore_index=True)
+
+ def update_workflow_schema_cache(self, tasks: List[Dict]):
+ """Update per-workflow dynamic schema snapshots from a batch of task records."""
+ by_workflow: Dict[str, List[Dict]] = {}
+ for task in tasks:
+ workflow_id = task.get("workflow_id")
+ if workflow_id:
+ by_workflow.setdefault(workflow_id, []).append(task)
+
+ for workflow_id, workflow_tasks in by_workflow.items():
+ tracker = self.workflow_schema_trackers.setdefault(
+ workflow_id,
+ DynamicSchemaTracker(**self._tracker_config),
+ )
+ tracker.update_with_tasks(workflow_tasks)
+ _df = _to_context_df(workflow_tasks)
+ existing = self._context.workflow_schema_cache.get(workflow_id, {}).get("current_fields", [])
+ current_fields = sorted(set(existing) | set(_df.columns))
+ self._context.workflow_schema_cache[workflow_id] = {
+ "dynamic_schema": tracker.get_schema(),
+ "value_examples": tracker.get_example_values(),
+ "current_fields": current_fields,
+ }
+
+ def get_workflow_schema_snapshot(self, workflow_id: str):
+ """Return the cached schema snapshot for a workflow, loading from DB on cache miss."""
+ if not workflow_id:
+ return None
+ if workflow_id in self._context.workflow_schema_cache:
+ return self._context.workflow_schema_cache[workflow_id]
+ try:
+ from flowcept.flowcept_api.db_api import DBAPI
+
+ snapshot = DBAPI().get_workflow_domain_data_schema(workflow_id)
+ except Exception as e:
+ self.logger.exception(e)
+ snapshot = None
+ if snapshot:
+ self._context.workflow_schema_cache[workflow_id] = snapshot
+ return snapshot
+
+ def persist_workflow_schema_snapshot(self, workflow_id: str) -> bool:
+ """Persist the cached workflow schema snapshot into workflow metadata."""
+ snapshot = self.get_workflow_schema_snapshot(workflow_id)
+ if not snapshot:
+ return False
+ try:
+ from flowcept.flowcept_api.db_api import DBAPI
+
+ return DBAPI().save_workflow_domain_data_schema(workflow_id, snapshot)
+ except Exception as e:
+ self.logger.exception(e)
+ return False
diff --git a/src/flowcept/agents/dynamic_schema_tracker.py b/src/flowcept/agents/provenance_schema_manager/dynamic_schema_tracker.py
similarity index 100%
rename from src/flowcept/agents/dynamic_schema_tracker.py
rename to src/flowcept/agents/provenance_schema_manager/dynamic_schema_tracker.py
diff --git a/src/flowcept/agents/provenance_schema_manager/static_schema_builder.py b/src/flowcept/agents/provenance_schema_manager/static_schema_builder.py
new file mode 100644
index 00000000..e9eee3e3
--- /dev/null
+++ b/src/flowcept/agents/provenance_schema_manager/static_schema_builder.py
@@ -0,0 +1,189 @@
+"""Schema introspection utility for building prompt context from class attribute docstrings.
+
+Called once at MCP server startup. Never imported by producer-path code.
+"""
+
+import ast
+import inspect
+import textwrap
+from typing import Any
+
+
+class SchemaDocumentationError(Exception):
+ """Raised at MCP server startup when a domain class has undocumented fields."""
+
+
+def get_attribute_docstrings(cls: type) -> dict[str, str]:
+ """Extract attribute docstrings from a class via AST parsing.
+
+ Reads the source of ``cls`` and walks its class body looking for annotated
+ assignments (``AnnAssign``) immediately followed by a string literal
+ (``Expr(Constant(str))``), which is the Python attribute-docstring convention.
+
+ Parameters
+ ----------
+ cls : type
+ The class to introspect.
+
+ Returns
+ -------
+ dict[str, str]
+ Mapping of field name to its docstring. Fields without a following
+ string literal are not included.
+ """
+ try:
+ source = textwrap.dedent(inspect.getsource(cls))
+ tree = ast.parse(source)
+ except (OSError, TypeError, IndentationError):
+ return {}
+
+ class_def = next(
+ (n for n in ast.walk(tree) if isinstance(n, ast.ClassDef) and n.name == cls.__name__),
+ None,
+ )
+ if class_def is None:
+ return {}
+
+ docs: dict[str, str] = {}
+ body = class_def.body
+ for i, node in enumerate(body):
+ if not (isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name)):
+ continue
+ if i + 1 >= len(body):
+ continue
+ next_node = body[i + 1]
+ if (
+ isinstance(next_node, ast.Expr)
+ and isinstance(next_node.value, ast.Constant)
+ and isinstance(next_node.value.value, str)
+ ):
+ docs[node.target.id] = next_node.value.value.strip()
+ return docs
+
+
+def assert_schema_documented(*classes: type) -> None:
+ """Assert every non-private annotated field on each class has an attribute docstring.
+
+ Called at MCP server startup. Raises ``SchemaDocumentationError`` loudly so
+ the server refuses to start when any field is undocumented. Treat a startup
+ failure here as a bug: add the missing attribute docstring to the class.
+
+ Parameters
+ ----------
+ *classes : type
+ Domain classes to check (e.g. TaskObject, TelemetrySummary).
+
+ Raises
+ ------
+ SchemaDocumentationError
+ If any class has fields without attribute docstrings.
+ """
+ errors: list[str] = []
+ for cls in classes:
+ annotations = {
+ name: hint for name, hint in getattr(cls, "__annotations__", {}).items() if not name.startswith("_")
+ }
+ if not annotations:
+ continue
+ docs = get_attribute_docstrings(cls)
+ missing = [name for name in annotations if name not in docs]
+ if missing:
+ errors.append(f" {cls.__qualname__}: {missing}")
+
+ if errors:
+ raise SchemaDocumentationError(
+ "MCP server cannot start — the following fields are missing attribute docstrings.\n"
+ "Add a triple-quoted string immediately after each field declaration:\n\n"
+ + "\n".join(errors)
+ + "\n\nExample:\n"
+ " my_field: str = None\n"
+ ' """Description of my_field."""\n'
+ )
+
+
+def _build_field_table(cls: type, subclasses: dict[str, type] | None = None) -> list[dict[str, Any]]:
+ """Build a list of field descriptors for a class, expanding known subclasses.
+
+ Parameters
+ ----------
+ cls : type
+ The class to describe.
+ subclasses : dict[str, type], optional
+ Mapping of field name to its nested class, used to expand composite fields
+ (e.g. ``{"cpu": CpuSummary}``).
+
+ Returns
+ -------
+ list[dict]
+ Each entry has ``name``, ``type``, and ``description``. Nested fields
+ use dot-notation names (e.g. ``cpu.percent_all_diff``).
+ """
+ docs = get_attribute_docstrings(cls)
+ annotations = {name: hint for name, hint in getattr(cls, "__annotations__", {}).items() if not name.startswith("_")}
+ rows: list[dict[str, Any]] = []
+ for name, hint in annotations.items():
+ doc = docs.get(name, "")
+ type_str = getattr(hint, "__name__", str(hint))
+ if subclasses and name in subclasses:
+ sub_cls = subclasses[name]
+ sub_docs = get_attribute_docstrings(sub_cls)
+ sub_annotations = {
+ n: h for n, h in getattr(sub_cls, "__annotations__", {}).items() if not n.startswith("_")
+ }
+ for sub_name, sub_hint in sub_annotations.items():
+ rows.append(
+ {
+ "name": f"{name}.{sub_name}",
+ "type": getattr(sub_hint, "__name__", str(sub_hint)),
+ "description": sub_docs.get(sub_name, ""),
+ }
+ )
+ else:
+ rows.append({"name": name, "type": type_str, "description": doc})
+ return rows
+
+
+def build_schema_context() -> dict[str, list[dict[str, Any]]]:
+ """Build the full static schema context at MCP server startup.
+
+ Introspects domain classes to produce field tables used by prompt builders.
+ The result is cached as ``SCHEMA_CONTEXT`` at module level — call this once.
+
+ Returns
+ -------
+ dict
+ Keys: ``task_fields``, ``workflow_fields``, ``agent_fields``,
+ ``blob_fields``, ``telemetry_summary_fields``.
+ Each value is a list of ``{name, type, description}`` dicts.
+ """
+ from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
+ from flowcept.commons.flowcept_dataclasses.workflow_object import WorkflowObject
+ from flowcept.commons.flowcept_dataclasses.agent_object import AgentObject
+ from flowcept.commons.flowcept_dataclasses.blob_object import BlobObject
+ from flowcept.commons.task_data_preprocess import (
+ TelemetrySummary,
+ CpuSummary,
+ MemorySummary,
+ DiskSummary,
+ NetworkSummary,
+ )
+
+ telemetry_subclasses = {
+ "cpu": CpuSummary,
+ "memory": MemorySummary,
+ "disk": DiskSummary,
+ "network": NetworkSummary,
+ }
+
+ return {
+ "task_fields": _build_field_table(TaskObject),
+ "workflow_fields": _build_field_table(WorkflowObject),
+ "agent_fields": _build_field_table(AgentObject),
+ "blob_fields": _build_field_table(BlobObject),
+ "telemetry_summary_fields": _build_field_table(TelemetrySummary, subclasses=telemetry_subclasses),
+ }
+
+
+# Populated at MCP server startup via mcp_server.py lifespan.
+# Do not access before assert_schema_documented() has been called.
+SCHEMA_CONTEXT: dict[str, list[dict[str, Any]]] = {}
diff --git a/src/flowcept/agents/tool_result.py b/src/flowcept/agents/tool_result.py
new file mode 100644
index 00000000..b1f06c2c
--- /dev/null
+++ b/src/flowcept/agents/tool_result.py
@@ -0,0 +1,41 @@
+"""Shared ToolResult wrapper for MCP tools and webservice chat tools."""
+
+from typing import Union, Dict
+from pydantic import BaseModel
+
+
+class ToolResult(BaseModel):
+ """Standardized wrapper for tool outputs.
+
+ Conventions
+ -----------
+ - 2xx: success (string result)
+ - 3xx: success (dict result)
+ - 4xx: error (string message)
+ - 5xx: error (dict result)
+ """
+
+ code: int | None = None
+ result: Union[str, Dict] = None
+ extra: Dict | str | None = None
+ tool_name: str | None = None
+
+ def result_is_str(self) -> bool:
+ """Return True if the result is a string."""
+ return (200 <= self.code < 300) or (400 <= self.code < 500)
+
+ def is_success(self) -> bool:
+ """Return True if the result is a success."""
+ return self.is_success_string() or self.is_success_dict()
+
+ def is_success_string(self) -> bool:
+ """Return True if the result is a 2xx success string."""
+ return 200 <= self.code < 300
+
+ def is_error_string(self) -> bool:
+ """Return True if the result is a 4xx error string."""
+ return 400 <= self.code < 500
+
+ def is_success_dict(self) -> bool:
+ """Return True if the result is a 3xx success dict."""
+ return 300 <= self.code < 400
diff --git a/src/flowcept/agents/tools/__init__.py b/src/flowcept/agents/tools/__init__.py
deleted file mode 100644
index e8e337d3..00000000
--- a/src/flowcept/agents/tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Agent Tools Package."""
diff --git a/src/flowcept/agents/tools/db_prov_tools.py b/src/flowcept/agents/tools/db_prov_tools.py
deleted file mode 100644
index e81f8bb9..00000000
--- a/src/flowcept/agents/tools/db_prov_tools.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""MCP adapters exposing the shared provenance tool core to external agent clients.
-
-Thin ``@mcp.tool`` wrappers around :mod:`flowcept.agents.tools.prov_tools`, giving MCP
-clients (Claude Code, Codex, etc.) real DB-backed provenance querying — the same tool
-core used by the webservice chat.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from flowcept.agents.agents_utils import ToolResult
-from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
-from flowcept.agents.tools import prov_tools
-
-
-@mcp_flowcept.tool()
-def query_provenance_tasks(
- filter: Optional[Dict[str, Any]] = None,
- projection: Optional[List[str]] = None,
- limit: int = 100,
- sort: Optional[List[Dict[str, Any]]] = None,
-) -> ToolResult:
- """Query task provenance records in the database with a Mongo-style filter."""
- return prov_tools.query_tasks(filter=filter, projection=projection, limit=limit, sort=sort)
-
-
-@mcp_flowcept.tool()
-def query_provenance_workflows(filter: Optional[Dict[str, Any]] = None, limit: int = 100) -> ToolResult:
- """Query workflow provenance records in the database with a Mongo-style filter."""
- return prov_tools.query_workflows(filter=filter, limit=limit)
-
-
-@mcp_flowcept.tool()
-def get_provenance_task_summary(filter: Optional[Dict[str, Any]] = None) -> ToolResult:
- """Summarize tasks matching a filter: status counts, per-activity durations, time range."""
- return prov_tools.get_task_summary(filter=filter)
-
-
-@mcp_flowcept.tool()
-def list_provenance_campaigns() -> ToolResult:
- """List derived campaign summaries (campaigns group workflows and tasks)."""
- return prov_tools.list_campaigns()
-
-
-@mcp_flowcept.tool()
-def list_provenance_agents() -> ToolResult:
- """List derived agent summaries (agents observed in task provenance)."""
- return prov_tools.list_agents()
diff --git a/src/flowcept/agents/tools/general_tools.py b/src/flowcept/agents/tools/general_tools.py
deleted file mode 100644
index f5c88797..00000000
--- a/src/flowcept/agents/tools/general_tools.py
+++ /dev/null
@@ -1,255 +0,0 @@
-import json
-from typing import List
-
-from flowcept import Flowcept
-from flowcept.agents.agents_utils import build_llm_model, ToolResult, normalize_message
-from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
-from flowcept.agents.prompts.general_prompts import ROUTING_PROMPT, SMALL_TALK_PROMPT
-
-from flowcept.agents.tools.in_memory_queries.in_memory_queries_tools import run_df_query
-from flowcept.agents.tools.workflow_query_tools import run_workflow_query
-
-
-def _external_llm_enabled() -> bool:
- """Return True when agent is configured to use an external LLM orchestrator."""
- from flowcept.configs import AGENT
-
- return bool(AGENT.get("external_llm", False))
-
-
-@mcp_flowcept.tool()
-def get_latest(n: int = None) -> str:
- """
- Return the most recent task(s) from the task buffer.
-
- Parameters
- ----------
- n : int, optional
- Number of most recent tasks to return. If None, return only the latest.
-
- Returns
- -------
- str
- JSON-encoded task(s).
- """
- ctx = mcp_flowcept.get_context()
- tasks = ctx.request_context.lifespan_context.tasks
- if not tasks:
- return "No tasks available."
- if n is None:
- return json.dumps(tasks[-1])
- return json.dumps(tasks[-n])
-
-
-@mcp_flowcept.tool()
-def check_liveness() -> str:
- """
- Confirm the agent is alive and responding.
-
- Returns
- -------
- str
- Liveness status string.
- """
- return f"I'm {mcp_flowcept.name} and I'm ready!"
-
-
-@mcp_flowcept.tool()
-def check_llm() -> str:
- """
- Check connectivity and response from the LLM backend.
-
- Returns
- -------
- str
- LLM response, formatted with MCP metadata.
- """
- llm = build_llm_model()
- response = llm("Hello?")
- return response
-
-
-@mcp_flowcept.tool()
-def record_guidance(message: str) -> ToolResult:
- """
- Record guidance tool.
- """
- ctx = mcp_flowcept.get_context()
- message = message.replace("@record", "")
- custom_guidance: List = ctx.request_context.lifespan_context.custom_guidance
- custom_guidance.append(message)
-
- return ToolResult(code=201, result=f"Ok. I recorded in my memory: {message}")
-
-
-@mcp_flowcept.tool()
-def show_records() -> ToolResult:
- """
- Lists all recorded user guidance.
- """
- try:
- ctx = mcp_flowcept.get_context()
- custom_guidance: List = ctx.request_context.lifespan_context.custom_guidance
- if not custom_guidance:
- message = "There is no recorded user guidance."
- else:
- message = "This is the list of custom guidance I have in my memory:\n"
- message += "\n".join(f" - {msg}" for msg in custom_guidance)
-
- return ToolResult(code=201, result=message)
- except Exception as e:
- return ToolResult(code=499, result=str(e))
-
-
-@mcp_flowcept.tool()
-def reset_records() -> ToolResult:
- """
- Resets all recorded user guidance.
- """
- try:
- ctx = mcp_flowcept.get_context()
- ctx.request_context.lifespan_context.custom_guidance = []
- return ToolResult(code=201, result="Custom guidance reset.")
- except Exception as e:
- return ToolResult(code=499, result=str(e))
-
-
-@mcp_flowcept.tool()
-def reset_context() -> ToolResult:
- """
- Resets all context.
- """
- try:
- ctx = mcp_flowcept.get_context()
- ctx.request_context.lifespan_context.reset_context()
- return ToolResult(code=201, result="Context reset.")
- except Exception as e:
- return ToolResult(code=499, result=str(e))
-
-
-@mcp_flowcept.tool()
-def generate_workflow_card(
- workflow_id: str | None = None,
- campaign_id: str | None = None,
- input_jsonl_path: str | None = None,
-) -> ToolResult:
- """
- Generate and return a markdown workflow card as text.
-
- Exactly one of ``workflow_id``, ``campaign_id``, or ``input_jsonl_path`` must be provided.
-
- Parameters
- ----------
- workflow_id : str | None
- Query by workflow identifier.
- campaign_id : str | None
- Query by campaign identifier (produces a campaign-level card).
- input_jsonl_path : str | None
- Path to a Flowcept JSONL buffer file used as input instead of the DB.
-
- Returns
- -------
- ToolResult
- ``code=301`` with markdown text in ``result["markdown"]`` on success,
- or an error payload on failure.
- """
- try:
- if not any([workflow_id, campaign_id, input_jsonl_path]):
- return ToolResult(code=400, result="One of workflow_id, campaign_id, or input_jsonl_path is required.")
-
- stats = Flowcept.generate_report(
- report_type="workflow_card",
- format="markdown",
- workflow_id=workflow_id,
- campaign_id=campaign_id,
- input_jsonl_path=input_jsonl_path,
- )
- return ToolResult(
- code=301,
- result={
- "workflow_id": workflow_id,
- "campaign_id": campaign_id,
- "markdown": stats["markdown"],
- },
- )
- except Exception as e:
- return ToolResult(code=499, result=str(e))
-
-
-@mcp_flowcept.tool()
-def prompt_handler(message: str) -> ToolResult:
- """
- Routes a user message using an LLM to classify its intent.
-
- Parameters
- ----------
- message : str
- User's natural language input.
-
- Returns
- -------
- TextContent
- The AI response or routing feedback.
- """
- workflow_query_prefix = "w:"
- task_query_prefix = "t:"
- object_query_prefix = "o:"
- normalized_message = message.strip().lower()
- if message.strip().lower().startswith(workflow_query_prefix):
- query = message.split(":", 1)[1].strip()
- return run_workflow_query(query=query)
- if normalized_message.startswith(task_query_prefix):
- query = message.split(":", 1)[1].strip()
- return run_df_query(query=query, llm=None, plot=False, context_kind="tasks")
- if normalized_message.startswith(object_query_prefix):
- query = message.split(":", 1)[1].strip()
- return run_df_query(query=query, llm=None, plot=False, context_kind="objects")
-
- df_key_words = ["df", "save", "result = df"]
- for key in df_key_words:
- if key in message:
- return run_df_query(query=message, llm=None, plot=False)
-
- if "reset context" in message:
- return reset_context()
- if "@record" in message:
- return record_guidance(message)
- if "@show records" in message:
- return show_records()
- if "@reset records" in message:
- return reset_records()
-
- if _external_llm_enabled():
- return ToolResult(
- code=201,
- result=(
- "external_llm mode is enabled. Internal LLM routing is disabled. "
- "Use explicit commands such as 'save', 'result = df ...', "
- "'t: ', 'o: