-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcsv_agent.py
More file actions
70 lines (59 loc) · 4.05 KB
/
csv_agent.py
File metadata and controls
70 lines (59 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# ── Imports ───────────────────────────────────────────────────────────────────
import os
from langchain_experimental.agents import create_csv_agent # builds a ReAct CSV agent
from langchain_openai import ChatOpenAI # OpenAI-compatible LLM wrapper (used for DeepSeek)
from dotenv import load_dotenv # reads .env file into os.environ
# Previous import (Groq-hosted models — now decommissioned):
# from langchain_groq import ChatGroq
# ── Environment Setup ─────────────────────────────────────────────────────────
# Load DEEPSEEK_API_KEY (and any other vars) from a .env file in the project root.
load_dotenv()
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
# Previous (Groq):
# groq_api_key = os.getenv("GROQ_API_KEY")
# ── Model Selection ───────────────────────────────────────────────────────────
# deepseek-chat = DeepSeek-V3 in standard (non-thinking) mode.
# No <think> tokens — cleaner output for the LangChain ReAct agent parser.
# Accessed via DeepSeek's own OpenAI-compatible API at https://api.deepseek.com
model = "deepseek-chat"
# Alternative: deepseek-reasoner — thinking mode (produces <think> tokens, stronger reasoning)
# model = "deepseek-reasoner"
# Previous models on Groq (all decommissioned):
# model = "deepseek-r1-distill-qwen-32b"
# model = "deepseek-r1-distill-llama-70b"
# model = "llama-3.1-8b-instant"
# ── LLM Initialisation — Previous (Groq / ChatGroq) ──────────────────────────
# llm = ChatGroq(
# model_name=model,
# api_key=groq_api_key,
# temperature=0.5
# )
# ── LLM Initialisation — DeepSeek via OpenAI-compatible API ──────────────────
# DeepSeek's API is OpenAI-compatible, so ChatOpenAI works by simply pointing
# base_url at DeepSeek's endpoint. No other changes needed.
# temperature=0.5 keeps answers focused while allowing some analytical flexibility.
llm = ChatOpenAI(
model=model, # "deepseek-chat"
api_key=deepseek_api_key, # DeepSeek API key from .env
base_url="https://api.deepseek.com", # DeepSeek's OpenAI-compatible endpoint
temperature=0.5 # 0 = fully deterministic, 1 = more creative
)
# ── Agent Factory ─────────────────────────────────────────────────────────────
# Builds a LangChain CSV ReAct agent that can write and run pandas code
# against the uploaded CSV file to answer the user's question.
# This function is called once per file and cached by @st.cache_resource
# in Chatbot.py — the same agent instance is reused for all subsequent queries.
def build_csv_agent(csv_file_name):
return create_csv_agent(
llm,
csv_file_name,
allow_dangerous_code=True, # required to enable Python REPL tool
verbose=True, # prints the full ReAct chain (thoughts, actions, tool calls) in terminal
agent_executor_kwargs={"handle_parsing_errors": True} # retry automatically if LLM output is malformed
)
# ── Query Runner ──────────────────────────────────────────────────────────────
# Runs a user question through a pre-built agent and returns the plain-text answer.
# Kept separate from build_csv_agent so the same agent can be reused across
# multiple messages without rebuilding it each time.
def csv_agent_invoker(agent, user_query):
return agent.invoke(user_query)["output"]