Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ Run a predefined standard question:
python -m app.core.main -q 1
```

The numbers for `-q` index into `standard_questions` in [app/core/questions.py](app/core/questions.py) (module `app.core.questions`).

Run a custom question:

```bash
Expand All @@ -151,8 +153,18 @@ Override the endpoint at runtime:
python -m app.core.main -c "Which lab extracts show inhibition above 50% against Leishmania donovani?" --endpoint https://your-endpoint.example/sparql
```

Attach a local input file to your question with `-f` (the file is copied into the session so the `FILE_ANALYZER` tool can use it):

```bash
python -m app.core.main -c "Summarize the annotations in this file" -f path/to/your_file.csv
```

MetaboT saves all result sets to CSV files in a temporary folder and returns the file path. When results are small, they are also displayed inline; for large result sets, only the file path is returned to avoid exceeding the LLM context window.

### LangSmith automated evaluation (benchmark)

This repository includes a LangSmith-based automated evaluation script at `app/core/tests/evaluation.py`. To run it locally you need a LangSmith API key (`LANGCHAIN_API_KEY` or `LANGSMITH_API_KEY`) and an LLM provider key (e.g. `OPENAI_API_KEY`). See [docs/examples/langsmith-evaluation.md](docs/examples/langsmith-evaluation.md).

### Streamlit web app

The repository also includes a Streamlit interface:
Expand Down
117 changes: 108 additions & 9 deletions app/core/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
import shutil
import argparse
from typing import Optional
from typing import Optional, List
from dotenv import load_dotenv
from langsmith import Client
from pathlib import Path
from app.core.session import create_user_session, initialize_session_context
from app.core.workflow.langraph_workflow import create_workflow, process_workflow
from app.core.utils import IntRange, setup_logger
import configparser
Expand Down Expand Up @@ -31,6 +33,14 @@
"mistral": "MISTRAL_API_KEY"
}

class SessionFilePreparationError(ValueError):
"""Raised when CLI input files cannot be staged into the session directory."""

def __init__(self, source_path: Path, message: str):
super().__init__(message)
self.source_path = source_path



def get_api_key(provider: str) -> Optional[str]:
"""
Expand Down Expand Up @@ -99,7 +109,8 @@ def llm_creation(api_key=None, params_file=None):
Reads the parameters from the configuration file (default is params.ini) and initializes the language models.

Args:
api_key (str, optional): The API key for the OpenAI API.
api_key (str, optional): OpenAI API key; if set, used only for OpenAI-backed config sections.
Other providers always use keys from the environment (see API_KEY_MAPPING).
params_file (str, optional): Path to an alternate configuration file.

Returns:
Expand Down Expand Up @@ -129,8 +140,10 @@ def llm_creation(api_key=None, params_file=None):
elif section.startswith("ovh"):
provider = "ovh"

if api_key is None:
api_key = get_api_key(provider)
# CLI `--api-key` is OpenAI-only; do not reuse it for other providers or across iterations.
section_api_key = (
api_key if (api_key is not None and provider == "openai") else get_api_key(provider)
)

model_params = {
"temperature": float(temperature),
Expand All @@ -143,12 +156,12 @@ def llm_creation(api_key=None, params_file=None):
base_url = config[section]["base_url"]
if provider == "deepseek":
model_params["openai_api_base"] = base_url
model_params["openai_api_key"] = api_key
model_params["openai_api_key"] = section_api_key
else:
model_params["base_url"] = base_url
model_params["api_key"] = api_key
model_params["api_key"] = section_api_key
else:
model_params["openai_api_key"] = api_key
model_params["openai_api_key"] = section_api_key

llm = ChatOpenAI(**model_params)
models[section] = llm
Expand Down Expand Up @@ -198,6 +211,72 @@ def langsmith_setup() -> Optional[Client]:
return None


def _prepare_session_files(session_id: str, file_paths: List[str]) -> Path:
"""
Copy user-supplied local files into the session's input directory so that
the FILE_ANALYZER tool can discover them at runtime.

Args:
session_id: Active session identifier.
file_paths: List of local file paths provided via the CLI.

Returns:
Path to the session input directory.

Raises:
SessionFilePreparationError: If any supplied path cannot be staged safely.
"""
input_dir = create_user_session(session_id, input_dir=True)
staged_destinations: dict[Path, Path] = {}

for raw_path in file_paths:
src = Path(raw_path).expanduser().resolve(strict=False)
if not src.exists():
raise SessionFilePreparationError(src, f"File not found: {src}")
if not src.is_file():
raise SessionFilePreparationError(src, f"Input path is not a file: {src}")

dest = input_dir / src.name
previous_src = staged_destinations.get(dest)
if previous_src is not None:
if previous_src == src:
raise SessionFilePreparationError(
src,
f"Input file was provided more than once: {src}",
)
raise SessionFilePreparationError(
src,
(
f"Cannot stage '{src}' because it would overwrite '{previous_src}' in "
f"the session input directory. Rename one of the files or choose a different path."
),
)

if src == dest.resolve(strict=False):
raise SessionFilePreparationError(
src,
f"Input file is already staged in the session directory: {src}",
)

if dest.exists():
raise SessionFilePreparationError(
src,
f"Cannot stage '{src}' because destination '{dest}' already exists.",
)

try:
shutil.copy2(str(src), str(dest))
except (shutil.SameFileError, OSError) as exc:
raise SessionFilePreparationError(
src,
f"Failed to stage '{src}' into '{dest}': {exc}",
) from exc

staged_destinations[dest] = src
logger.info(f"Copied '{src}' -> '{dest}'")

return input_dir

def main():
"""Main function to run the workflow."""
# Define command line arguments
Expand All @@ -207,6 +286,10 @@ def main():
help=f"Choose a standard question number from 1 to {len(standard_questions)}.")
parser.add_argument('-c', '--custom', type=str,
help="Provide a custom question.")
parser.add_argument(
'-f', '--file', type=str, nargs='+',
help="One or more local file paths to make available for the FILE_ANALYZER tool.",
)
parser.add_argument('-e', '--evaluation', action='store_true',
help="Enable evaluation mode")
parser.add_argument('--api-key', type=str,
Expand All @@ -224,21 +307,37 @@ def main():
print("You must provide either a standard question number or a custom question.")
return

# Create a user session (mirrors the Streamlit session lifecycle) and
# reconfigure the logger so subsequent CLI logs land in the session file.
session_id = create_user_session()
initialize_session_context(session_id)
global logger
logger = setup_logger(__name__)

if args.file:
try:
_prepare_session_files(session_id, args.file)
except SessionFilePreparationError as exc:
logger.error(str(exc))
print(f"Error: {exc}")
return

# Initialize LangSmith if available
langsmith_client = langsmith_setup()
langsmith_setup()

# Get endpoint URL from arguments or environment
endpoint_url = (
args.endpoint
or os.environ.get("KG_ENDPOINT_URL")
or "https://enpkg.commons-lab.org/graphdb/repositories/ENPKG"
)
models = llm_creation()
models = llm_creation(api_key=args.api_key)
Comment thread
qodo-code-review[bot] marked this conversation as resolved.

try:
# Create and process workflow
workflow = create_workflow(
models=models,
session_id=session_id,
endpoint_url=endpoint_url,
evaluation=False,
api_key=args.api_key
Expand Down
Loading