Project_6_AI_Agent_CSV_Visual/csv_agent.py at main · kamalviewcode-spec/Project_6_AI_Agent_CSV_Visual · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

# ── Imports ───────────────────────────────────────────────────────────────────
import os
from langchain_experimental.agents import create_csv_agent  # builds a ReAct CSV agent
from langchain_openai import ChatOpenAI                     # OpenAI-compatible LLM wrapper (used for DeepSeek)
from dotenv import load_dotenv                              # reads .env file into os.environ

# Previous import (Groq-hosted models — now decommissioned):
# from langchain_groq import ChatGroq

# ── Environment Setup ─────────────────────────────────────────────────────────
# Load DEEPSEEK_API_KEY (and any other vars) from a .env file in the project root.
load_dotenv()
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")

# Previous (Groq):
# groq_api_key = os.getenv("GROQ_API_KEY")

# ── Model Selection ───────────────────────────────────────────────────────────
# deepseek-chat = DeepSeek-V3 in standard (non-thinking) mode.
# No <think> tokens — cleaner output for the LangChain ReAct agent parser.
# Accessed via DeepSeek's own OpenAI-compatible API at https://api.deepseek.com
model = "deepseek-chat"

# Alternative: deepseek-reasoner — thinking mode (produces <think> tokens, stronger reasoning)
# model = "deepseek-reasoner"

# Previous models on Groq (all decommissioned):
# model = "deepseek-r1-distill-qwen-32b"
# model = "deepseek-r1-distill-llama-70b"
# model = "llama-3.1-8b-instant"

# ── LLM Initialisation — Previous (Groq / ChatGroq) ──────────────────────────
# llm = ChatGroq(
#     model_name=model,
#     api_key=groq_api_key,
#     temperature=0.5
# )

# ── LLM Initialisation — DeepSeek via OpenAI-compatible API ──────────────────
# DeepSeek's API is OpenAI-compatible, so ChatOpenAI works by simply pointing
# base_url at DeepSeek's endpoint. No other changes needed.
# temperature=0.5 keeps answers focused while allowing some analytical flexibility.
llm = ChatOpenAI(
    model=model,                            # "deepseek-chat"
    api_key=deepseek_api_key,               # DeepSeek API key from .env
    base_url="https://api.deepseek.com",    # DeepSeek's OpenAI-compatible endpoint
    temperature=0.5                         # 0 = fully deterministic, 1 = more creative
)

# ── Agent Factory ─────────────────────────────────────────────────────────────
# Builds a LangChain CSV ReAct agent that can write and run pandas code
# against the uploaded CSV file to answer the user's question.
# This function is called once per file and cached by @st.cache_resource
# in Chatbot.py — the same agent instance is reused for all subsequent queries.
def build_csv_agent(csv_file_name):
    return create_csv_agent(
        llm,
        csv_file_name,
        allow_dangerous_code=True,                              # required to enable Python REPL tool
        verbose=True,                                           # prints the full ReAct chain (thoughts, actions, tool calls) in terminal
        agent_executor_kwargs={"handle_parsing_errors": True}  # retry automatically if LLM output is malformed
    )

# ── Query Runner ──────────────────────────────────────────────────────────────
# Runs a user question through a pre-built agent and returns the plain-text answer.
# Kept separate from build_csv_agent so the same agent can be reused across
# multiple messages without rebuilding it each time.
def csv_agent_invoker(agent, user_query):
    return agent.invoke(user_query)["output"]