From b70828053edb886145f48d8fa1218e690dead397 Mon Sep 17 00:00:00 2001 From: madina1203 Date: Sun, 26 Apr 2026 20:10:10 +0200 Subject: [PATCH] Add how to run evaluation.py (LangSmith api key is needed) --- README.md | 4 ++++ app/core/tests/evaluation.py | 2 +- docs/examples/langsmith-evaluation.md | 20 ++++++++++++++++++++ docs/user-guide/configuration.md | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 docs/examples/langsmith-evaluation.md diff --git a/README.md b/README.md index a589a7d..3128c0c 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,10 @@ python -m app.core.main -c "Which lab extracts show inhibition above 50% against MetaboT saves all result sets to CSV files in a temporary folder and returns the file path. When results are small, they are also displayed inline; for large result sets, only the file path is returned to avoid exceeding the LLM context window. +### LangSmith automated evaluation (benchmark) + +This repository includes a LangSmith-based automated evaluation script at `app/core/tests/evaluation.py`. To run it locally you need a LangSmith API key (`LANGCHAIN_API_KEY` or `LANGSMITH_API_KEY`) and an LLM provider key (e.g. `OPENAI_API_KEY`). See [docs/examples/langsmith-evaluation.md](docs/examples/langsmith-evaluation.md). + ### Streamlit web app The repository also includes a Streamlit interface: diff --git a/app/core/tests/evaluation.py b/app/core/tests/evaluation.py index 2e06f56..ccd171e 100644 --- a/app/core/tests/evaluation.py +++ b/app/core/tests/evaluation.py @@ -24,7 +24,7 @@ openai_key = os.getenv("OPENAI_API_KEY") if not api_key: - raise ValueError("Missing LANGCHAIN_API_KEY. Please copy .env.template to .env and add your key.") + raise ValueError("Missing LANGCHAIN_API_KEY (or LANGSMITH_API_KEY). Please add it to your repo-root .env (see docs/user-guide/configuration.md).") if not openai_key: raise ValueError("Missing OPENAI_API_KEY. Please copy .env.template to .env and add your key.") diff --git a/docs/examples/langsmith-evaluation.md b/docs/examples/langsmith-evaluation.md new file mode 100644 index 0000000..c6a489c --- /dev/null +++ b/docs/examples/langsmith-evaluation.md @@ -0,0 +1,20 @@ +# LangSmith automated evaluation + +To run the automated evaluation you need a **LangSmith API key**. Create an account and generate an API key using the LangSmith docs: [Create an account and API key](https://docs.langchain.com/langsmith/create-account-api-key). + +Add the key to your repo-root `.env` (do not commit it), along with your LLM provider key: + +```env +LANGCHAIN_API_KEY=your_langsmith_key # or LANGSMITH_API_KEY +LANGCHAIN_PROJECT=MetaboT_Test_Run +OPENAI_API_KEY=your_openai_key +``` + +Then run (from the repository root): + +```bash +python -m app.core.tests.evaluation +``` + +This starts the benchmark evaluation (running MetaboT on ~50 questions). LLM-based evaluators compare the generated answers with the reference answers. A link to the evaluation run will appear once the evaluation starts, and you can also find it later in your LangSmith account. + diff --git a/docs/user-guide/configuration.md b/docs/user-guide/configuration.md index 85afe7b..f225e74 100644 --- a/docs/user-guide/configuration.md +++ b/docs/user-guide/configuration.md @@ -142,6 +142,8 @@ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com If no tracing key is present, MetaboT disables tracing automatically. +For running the repository's LangSmith-based automated benchmark evaluation, see [docs/examples/langsmith-evaluation.md](../examples/langsmith-evaluation.md). + ## SPARQL and Schema Configuration `app/config/sparql.ini` contains helper queries used to inspect or work with the graph schema. These are especially important when MetaboT needs to construct schema-aware prompts from an endpoint rather than from assumptions.