From a58b552ddfdd909ccce856837e5fe16908604be7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Mar 2026 20:14:20 +0000 Subject: [PATCH 1/4] Initial plan From 33e1b68a3748b9701d921d6487f4a0cf1116f930 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 6 Mar 2026 20:20:21 +0000 Subject: [PATCH 2/4] Fix CI failures: add new chatbot modules and fix flake8 F824 error in api.py Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com> --- .github/workflows/ci.yml | 56 +++++ .gitignore | 27 +++ Dockerfile | 29 +++ ai_chatbot.py | 436 +++++++++++++++++++++++++++------------ api.py | 147 +++++++++++++ requirements.txt | 19 +- test_chatbot.py | 218 ++++++++++++++++++++ web_demo.py | 121 +++++++++++ 8 files changed, 923 insertions(+), 130 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 api.py create mode 100644 test_chatbot.py create mode 100644 web_demo.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..75d7c6a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,56 @@ +name: CI + +on: + push: + branches: ["main", "master", "copilot/**"] + pull_request: + branches: ["main", "master"] + +# Restrict default GITHUB_TOKEN permissions to read-only +permissions: + contents: read + +jobs: + lint-and-test: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Lint with flake8 + run: | + pip install flake8 + # Stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # Treat all other issues as warnings (non-blocking) + flake8 . --count --exit-zero --max-line-length=100 --statistics + + - name: Run tests + run: pytest test_chatbot.py -v + + docker-build: + runs-on: ubuntu-latest + needs: lint-and-test + permissions: + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Build Docker image (web mode) + run: docker build --build-arg MODE=web -t ai-chatbot:web . + + - name: Build Docker image (api mode) + run: docker build --build-arg MODE=api -t ai-chatbot:api . diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..688ab4d --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Python cache +__pycache__/ +*.py[cod] + +# Virtual environment +venv/ +.venv/ + +# Jupyter/IPython +.ipynb_checkpoints/ + +# System files +.DS_Store +Thumbs.db + +# IDE settings +.vscode/ +.idea/ + +# Environment / secrets +.env + +# Pytest cache +.pytest_cache/ + +# Temporary / generated files +converted_dialog.csv diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..6e714b6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.12-slim + +# Build arguments to select the runtime mode: +# web → run the Streamlit web demo (default) +# api → run the FastAPI REST backend +ARG MODE=web + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy source +COPY . . + +# Expose ports +# 8501 → Streamlit +# 8000 → FastAPI / uvicorn +EXPOSE 8501 8000 + +ENV MODE=${MODE} + +CMD ["sh", "-c", "\ + if [ \"$MODE\" = 'api' ]; then \ + uvicorn api:app --host 0.0.0.0 --port 8000; \ + else \ + streamlit run web_demo.py --server.port 8501 --server.address 0.0.0.0; \ + fi"] diff --git a/ai_chatbot.py b/ai_chatbot.py index cd0becc..baf17bd 100644 --- a/ai_chatbot.py +++ b/ai_chatbot.py @@ -1,167 +1,347 @@ -import os -import tkinter as tk -from tkinter import scrolledtext +""" +AI ChatBot — core module. + +Supports two backends, selected automatically: + 1. LLM backend – uses the OpenAI Chat Completions API when an + OPENAI_API_KEY environment variable (or .env file) is present. + 2. Pattern-matching backend – offline fallback using a CSV dialog dataset. + +The public surface area is intentionally small so that web_demo.py and +api.py can both import from this module without pulling in Tkinter. +""" + import csv +import os import random -import kagglehub +from typing import Optional + +# Load .env variables if python-dotenv is available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +# --------------------------------------------------------------------------- +# Pattern-matching backend +# --------------------------------------------------------------------------- -# Simple chatbot class using pattern matching class SimpleBot: - def __init__(self): - self.responses = {} - self.default_responses = [ + """Offline chatbot backed by a CSV dialog dataset.""" + + def __init__(self) -> None: + self.responses: dict[str, list[str]] = {} + self.default_responses: list[str] = [ "I'm not sure I understand. Could you rephrase that?", "Interesting question! I'm still learning.", "I don't have an answer for that yet.", - "Could you tell me more about that?" + "Could you tell me more about that?", ] - - def train(self, dialog_file): + + def train(self, dialog_file: str) -> None: try: with open(dialog_file, encoding="utf-8") as file: reader = csv.reader(file) - next(reader) # Skip header row - current_dialog = None - question = None - + next(reader) # skip header + question: Optional[str] = None + for row in reader: - if len(row) >= 3: # dialog_id, line_id, text - dialog_id = row[0] + if len(row) >= 3: line_id = row[1] text = row[2] - - if line_id == '1': # This is a question/prompt + + if line_id == "1": question = text.lower() - elif line_id == '2' and question: # This is a response - if question not in self.responses: - self.responses[question] = [] - self.responses[question].append(text) + elif line_id == "2" and question: + self.responses.setdefault(question, []).append(text) question = None + print(f"Trained with {len(self.responses)} dialog patterns") - except Exception as e: - print(f"Error loading training data: {e}") - - def get_response(self, message): - message = message.lower() - - # Check for exact matches - if message in self.responses: - return random.choice(self.responses[message]) - - # Check for partial matches - for pattern, responses in self.responses.items(): - if pattern in message or message in pattern: - return random.choice(responses) - - # Return default response if no match + except Exception as exc: + print(f"Error loading training data: {exc}") + + def get_response(self, message: str, history: Optional[list] = None) -> str: + """Return a pattern-matched reply. *history* is accepted but unused.""" + key = message.lower() + + if key in self.responses: + return random.choice(self.responses[key]) + + for pattern, replies in self.responses.items(): + if pattern in key or key in pattern: + return random.choice(replies) + return random.choice(self.default_responses) -# Initialize chatbot -chatbot = SimpleBot() -# Add method to parse Kaggle's dialogs.txt format -def parse_kaggle_dialogs(file_path): +# --------------------------------------------------------------------------- +# LLM backend (OpenAI) +# --------------------------------------------------------------------------- + +class LLMBot: + """Chatbot backed by the OpenAI Chat Completions API.""" + + SYSTEM_PROMPT = ( + "You are a helpful, friendly, and concise AI assistant. " + "Answer clearly and stay on topic." + ) + + def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo") -> None: + from openai import OpenAI # deferred import so SimpleBot works without openai + self.model = model + self.client = OpenAI(api_key=api_key or os.environ.get("OPENAI_API_KEY")) + + def get_response(self, message: str, history: Optional[list] = None) -> str: + """Call the OpenAI API and return the assistant reply. + + *history* is a list of ``{"role": ..., "content": ...}`` dicts + representing the conversation so far (not including the current message). + """ + messages = [{"role": "system", "content": self.SYSTEM_PROMPT}] + if history: + messages.extend(history) + messages.append({"role": "user", "content": message}) + + completion = self.client.chat.completions.create( + model=self.model, + messages=messages, + max_tokens=512, + temperature=0.7, + ) + return completion.choices[0].message.content.strip() + + def stream_response(self, message: str, history: Optional[list] = None): + """Yield response tokens one by one (for streaming UIs).""" + messages = [{"role": "system", "content": self.SYSTEM_PROMPT}] + if history: + messages.extend(history) + messages.append({"role": "user", "content": message}) + + stream = self.client.chat.completions.create( + model=self.model, + messages=messages, + max_tokens=512, + temperature=0.7, + stream=True, + ) + for chunk in stream: + delta = chunk.choices[0].delta.content + if delta: + yield delta + + +# --------------------------------------------------------------------------- +# Unified ChatBot facade +# --------------------------------------------------------------------------- + +class ChatBot: + """High-level chatbot that auto-selects LLM or pattern-matching backend. + + Priority: + 1. Use LLMBot if ``OPENAI_API_KEY`` is set. + 2. Fall back to SimpleBot otherwise. + + Conversation history is maintained internally so callers only need to + pass the current user message. + """ + + MAX_HISTORY = 20 # keep last N turns in context + + def __init__(self, dialog_file: Optional[str] = None, force_offline: bool = False) -> None: + self.history: list[dict] = [] + self._llm: Optional[LLMBot] = None + self._simple: Optional[SimpleBot] = None + + if not force_offline and os.environ.get("OPENAI_API_KEY"): + try: + self._llm = LLMBot() + print("Using LLM backend (OpenAI).") + except Exception as exc: + print(f"LLM init failed ({exc}), falling back to pattern-matching.") + + if self._llm is None: + self._simple = SimpleBot() + self._load_training_data(dialog_file) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + @property + def mode(self) -> str: + return "llm" if self._llm else "pattern" + + def chat(self, message: str) -> str: + """Process *message* and return a reply; history is updated automatically.""" + if self._llm: + reply = self._llm.get_response(message, self.history) + else: + reply = self._simple.get_response(message, self.history) # type: ignore[union-attr] + + self._update_history(message, reply) + return reply + + def stream_chat(self, message: str): + """Yield response tokens (LLM mode) or yield the full reply at once.""" + if self._llm: + tokens: list[str] = [] + for token in self._llm.stream_response(message, self.history): + tokens.append(token) + yield token + reply = "".join(tokens) + else: + reply = self._simple.get_response(message, self.history) # type: ignore[union-attr] + yield reply + + self._update_history(message, reply) + + @property + def pattern_count(self) -> int: + """Number of dialog patterns loaded (0 in LLM mode).""" + return len(self._simple.responses) if self._simple else 0 + + def reset_history(self) -> None: + self.history.clear() + + def train(self, dialog_file: str) -> None: + """Retrain the pattern-matching bot from a CSV file.""" + if self._simple: + self._simple.train(dialog_file) + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _update_history(self, user_msg: str, bot_reply: str) -> None: + self.history.append({"role": "user", "content": user_msg}) + self.history.append({"role": "assistant", "content": bot_reply}) + # Trim to avoid unbounded growth + if len(self.history) > self.MAX_HISTORY * 2: + self.history = self.history[-(self.MAX_HISTORY * 2):] + + def _load_training_data(self, dialog_file: Optional[str]) -> None: + if dialog_file and os.path.exists(dialog_file): + self._simple.train(dialog_file) # type: ignore[union-attr] + return + + # Try Kaggle dataset first, fall back to local dialog.csv + try: + import kagglehub + print("Attempting to download chatbot data from Kaggle...") + data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot") + + for root_dir, _, files in os.walk(data_path): + for fname in files: + if fname.lower() == "dialogs.txt": + kaggle_file = os.path.join(root_dir, fname) + converted = _parse_kaggle_dialogs(kaggle_file) + if converted: + print("Training chatbot with Kaggle data...") + self._simple.train(converted) # type: ignore[union-attr] + return + raise FileNotFoundError("dialogs.txt not found in Kaggle dataset") + + except Exception as exc: + print(f"Kaggle dataset unavailable ({exc}), using local data.") + + local = os.path.join(os.path.dirname(__file__), "dialog.csv") + if os.path.exists(local): + print("Training chatbot with local dialog data...") + self._simple.train(local) # type: ignore[union-attr] + else: + print("No training data found; bot will use default responses only.") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _parse_kaggle_dialogs(file_path: str) -> Optional[str]: + """Convert Kaggle's dialogs.txt into the CSV format expected by SimpleBot.""" try: print(f"Parsing Kaggle dialogs from {file_path}...") - dialog_pairs = [] - current_dialog_id = 0 - - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.readlines() - - # Create a temporary CSV file in the expected format + with open(file_path, "r", encoding="utf-8") as fh: + lines = fh.readlines() + temp_csv_path = os.path.join(os.path.dirname(file_path), "converted_dialog.csv") - with open(temp_csv_path, 'w', encoding='utf-8') as f: - f.write("dialog_id,line_id,text\n") - - for i in range(0, len(lines)-1, 2): - if i+1 < len(lines): - question = lines[i].strip() - answer = lines[i+1].strip() - - if question and answer: - current_dialog_id += 1 - f.write(f"{current_dialog_id},1,{question}\n") - f.write(f"{current_dialog_id},2,{answer}\n") - - print(f"Converted {current_dialog_id} dialog pairs to CSV format") + dialog_id = 0 + with open(temp_csv_path, "w", encoding="utf-8") as fh: + fh.write("dialog_id,line_id,text\n") + for i in range(0, len(lines) - 1, 2): + q, a = lines[i].strip(), lines[i + 1].strip() + if q and a: + dialog_id += 1 + fh.write(f"{dialog_id},1,{q}\n") + fh.write(f"{dialog_id},2,{a}\n") + + print(f"Converted {dialog_id} dialog pairs to CSV format") return temp_csv_path - except Exception as e: - print(f"Error parsing Kaggle dialogs: {e}") + except Exception as exc: + print(f"Error parsing Kaggle dialogs: {exc}") return None -# Try to download the dataset from Kaggle using kagglehub -try: - print("Attempting to download chatbot data from Kaggle...") - data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot") - - # Look for dialogs.txt (the actual file in the dataset) - for root, dirs, files in os.walk(data_path): - for file in files: - if file.lower() == "dialogs.txt": - kaggle_dialog_file = os.path.join(root, file) - print(f"Found Kaggle dialog file: {kaggle_dialog_file}") - - # Convert the Kaggle format to our expected CSV format - converted_file = parse_kaggle_dialogs(kaggle_dialog_file) - if converted_file: - print("Training chatbot with Kaggle data...") - chatbot.train(converted_file) - break - else: - continue - break - else: - raise FileNotFoundError("Dialog file not found in Kaggle dataset") - -except Exception as e: - print(f"Error with Kaggle dataset: {e}") - print("Falling back to local dialog data...") - - # Fallback to local dialog.csv file - local_dialog_file = "dialog.csv" - try: - print("Training chatbot with local dialog data...") - chatbot.train(local_dialog_file) - except Exception as e: - print(f"Error loading local training data: {e}") -# Create GUI -root = tk.Tk() -root.title("Chat with GUIBot") -root.geometry("500x550") +# --------------------------------------------------------------------------- +# Tkinter GUI (only runs when this file is executed directly) +# --------------------------------------------------------------------------- -chat_log = scrolledtext.ScrolledText(root, wrap=tk.WORD) -chat_log.config(state=tk.DISABLED) -chat_log.pack(padx=10, pady=10, fill=tk.BOTH, expand=True) +def _run_gui() -> None: + import tkinter as tk + from tkinter import scrolledtext -entry_frame = tk.Frame(root) -entry_frame.pack(padx=10, pady=10, fill=tk.X) + bot = ChatBot() -user_input = tk.Entry(entry_frame, font=("Arial", 14)) -user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10)) + gui_root = tk.Tk() + gui_root.title("Chat with AI Bot") + gui_root.geometry("500x580") + # ── mode badge ────────────────────────────────────────────────────────── + mode_label = tk.Label( + gui_root, + text=f"Mode: {'🤖 LLM (OpenAI)' if bot.mode == 'llm' else '📋 Pattern Matching'}", + font=("Arial", 10), + fg="#555", + ) + mode_label.pack(anchor="w", padx=12, pady=(6, 0)) -def send_message(): - message = user_input.get() - if message.strip(): + # ── chat log ──────────────────────────────────────────────────────────── + chat_log = scrolledtext.ScrolledText(gui_root, wrap=tk.WORD) + chat_log.config(state=tk.DISABLED) + chat_log.pack(padx=10, pady=6, fill=tk.BOTH, expand=True) + + # ── input row ─────────────────────────────────────────────────────────── + entry_frame = tk.Frame(gui_root) + entry_frame.pack(padx=10, pady=10, fill=tk.X) + + user_input = tk.Entry(entry_frame, font=("Arial", 14)) + user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10)) + + def send_message() -> None: + message = user_input.get().strip() + if not message: + return chat_log.config(state=tk.NORMAL) - chat_log.insert(tk.END, "You: " + message + "\n") - response = chatbot.get_response(message) - chat_log.insert(tk.END, "Bot: " + str(response) + "\n\n") + chat_log.insert(tk.END, f"You: {message}\n") + user_input.delete(0, tk.END) + + response = bot.chat(message) + chat_log.insert(tk.END, f"Bot: {response}\n\n") chat_log.config(state=tk.DISABLED) chat_log.yview(tk.END) - user_input.delete(0, tk.END) -send_btn = tk.Button(entry_frame, text="Send", command=send_message) -send_btn.pack(side=tk.RIGHT) + send_btn = tk.Button(entry_frame, text="Send", command=send_message) + send_btn.pack(side=tk.RIGHT) + + user_input.bind("", lambda _: send_message()) + + # ── welcome message ───────────────────────────────────────────────────── + chat_log.config(state=tk.NORMAL) + chat_log.insert(tk.END, "Bot: Hello! I'm your AI chatbot. How can I help you today?\n\n") + chat_log.config(state=tk.DISABLED) -user_input.bind("", lambda event=None: send_message()) + print("Starting GUI chatbot...") + gui_root.mainloop() -# Add a welcome message -chat_log.config(state=tk.NORMAL) -chat_log.insert(tk.END, "Bot: Hello! I'm your simple chatbot. How can I help you today?\n\n") -chat_log.config(state=tk.DISABLED) -print("Starting GUI chatbot...") -root.mainloop() +if __name__ == "__main__": + _run_gui() diff --git a/api.py b/api.py new file mode 100644 index 0000000..42b2689 --- /dev/null +++ b/api.py @@ -0,0 +1,147 @@ +""" +api.py — FastAPI REST backend for AI ChatBot. + +Run with: + uvicorn api:app --reload + +Endpoints: + POST /chat Send a message and receive a reply. + POST /train Reload the pattern-matching bot from a CSV file. + GET /health Health check. + DELETE /sessions/{id} Reset a conversation session. + +Environment variables: + OPENAI_API_KEY Optional – enables the LLM backend. +""" + +import os +import uuid +from typing import Optional + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +# Load .env if available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from ai_chatbot import ChatBot + +# --------------------------------------------------------------------------- +# App setup +# --------------------------------------------------------------------------- + +app = FastAPI( + title="AI ChatBot API", + description=( + "REST API for the AI ChatBot. Supports LLM (OpenAI) and offline " + "pattern-matching backends. Maintains per-session conversation history." + ), + version="1.0.0", +) + +# Per-session bots (keyed by session_id string) +_sessions: dict[str, ChatBot] = {} + + +def clear_all_sessions() -> None: + """Remove all active sessions. Intended for use in tests.""" + _sessions.clear() + + +def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: + """Return (session_id, ChatBot) for the given session; create if missing.""" + if session_id is None: + # Create a new session + session_id = str(uuid.uuid4()) + + if session_id not in _sessions: + _sessions[session_id] = ChatBot() + + return session_id, _sessions[session_id] + + +# --------------------------------------------------------------------------- +# Request / Response schemas +# --------------------------------------------------------------------------- + +class ChatRequest(BaseModel): + message: str + session_id: Optional[str] = None + + +class ChatResponse(BaseModel): + reply: str + session_id: str + mode: str + + +class TrainRequest(BaseModel): + dialog_file: str + + +class TrainResponse(BaseModel): + status: str + patterns_loaded: int + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + +@app.get("/health") +def health_check(): + """Simple health check.""" + return {"status": "ok"} + + +@app.post("/chat", response_model=ChatResponse) +def chat(request: ChatRequest): + """Send a message and receive a reply. + + If *session_id* is omitted, a new session is created and its ID is + returned so the client can continue the conversation. + """ + if not request.message.strip(): + raise HTTPException(status_code=400, detail="Message must not be empty.") + + session_id, bot = _get_or_create_session(request.session_id) + reply = bot.chat(request.message) + + return ChatResponse(reply=reply, session_id=session_id, mode=bot.mode) + + +@app.delete("/sessions/{session_id}") +def reset_session(session_id: str): + """Delete a conversation session (clears history).""" + if session_id in _sessions: + del _sessions[session_id] + return {"status": "session deleted", "session_id": session_id} + raise HTTPException(status_code=404, detail="Session not found.") + + +@app.post("/train", response_model=TrainResponse) +def train(request: TrainRequest): + """Reload pattern-matching data from a CSV file on the server. + + The file must exist on the server filesystem. This endpoint only + affects future sessions created after the reload (existing sessions + keep their current bot instance). + """ + if not os.path.exists(request.dialog_file): + raise HTTPException( + status_code=404, + detail=f"File not found: {request.dialog_file}", + ) + + # Retrain a fresh bot and store it as the template for new sessions + fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True) + patterns = fresh_bot.pattern_count + + # Clear all existing sessions so next requests pick up new data + _sessions.clear() + + return TrainResponse(status="retrained", patterns_loaded=patterns) diff --git a/requirements.txt b/requirements.txt index 7c416df..256e933 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,17 @@ -# Dependencies for the chatbot with Kaggle dataset integration -kagglehub \ No newline at end of file +# Core +kagglehub +python-dotenv>=1.0.0 + +# LLM backend (optional — enables OpenAI-powered responses) +openai>=1.0.0 + +# Web UI +streamlit>=1.28.0 + +# REST API +fastapi>=0.104.0 +uvicorn[standard]>=0.23.0 + +# Testing +pytest>=7.4.0 +httpx>=0.25.0 \ No newline at end of file diff --git a/test_chatbot.py b/test_chatbot.py new file mode 100644 index 0000000..fb814a2 --- /dev/null +++ b/test_chatbot.py @@ -0,0 +1,218 @@ +""" +test_chatbot.py — Unit tests for AI ChatBot. + +Run with: + pytest test_chatbot.py -v +""" + +import os +import tempfile +import pytest +from fastapi.testclient import TestClient + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +DIALOG_CSV = """\ +dialog_id,line_id,text +1,1,hello +1,2,Hi there! How can I help? +2,1,what is your name +2,2,I'm a chatbot built with Python! +3,1,bye +3,2,Goodbye! Have a great day. +""" + + +@pytest.fixture +def dialog_file(tmp_path): + """Write a small dialog CSV and return its path.""" + path = tmp_path / "test_dialog.csv" + path.write_text(DIALOG_CSV, encoding="utf-8") + return str(path) + + +# --------------------------------------------------------------------------- +# SimpleBot tests +# --------------------------------------------------------------------------- + +class TestSimpleBot: + def test_train_loads_patterns(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + assert len(bot.responses) == 3 + + def test_exact_match(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + assert bot.get_response("hello") == "Hi there! How can I help?" + + def test_partial_match(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + reply = bot.get_response("hey, hello there") + assert reply == "Hi there! How can I help?" + + def test_default_response_on_unknown(self, dialog_file): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train(dialog_file) + reply = bot.get_response("zxqwerty unknown phrase 12345") + assert isinstance(reply, str) + assert len(reply) > 0 + + def test_train_missing_file(self): + from ai_chatbot import SimpleBot + + bot = SimpleBot() + bot.train("/nonexistent/path/dialog.csv") + assert bot.responses == {} + + +# --------------------------------------------------------------------------- +# ChatBot (facade) tests — forced offline mode +# --------------------------------------------------------------------------- + +class TestChatBot: + def test_offline_mode(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + assert bot.mode == "pattern" + + def test_chat_returns_string(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + reply = bot.chat("hello") + assert isinstance(reply, str) + assert len(reply) > 0 + + def test_history_grows_with_turns(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + assert bot.history == [] + bot.chat("hello") + assert len(bot.history) == 2 # one user + one assistant + + def test_reset_history(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + bot.chat("hello") + bot.reset_history() + assert bot.history == [] + + def test_stream_chat_yields_text(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + tokens = list(bot.stream_chat("hello")) + assert len(tokens) >= 1 + assert "".join(tokens) == "Hi there! How can I help?" + + def test_history_capped(self, dialog_file): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + # Drive history past MAX_HISTORY * 2 + for _ in range(bot.MAX_HISTORY + 5): + bot.chat("hello") + assert len(bot.history) <= bot.MAX_HISTORY * 2 + + def test_retrain(self, dialog_file, tmp_path): + from ai_chatbot import ChatBot + + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + + new_csv = tmp_path / "new_dialog.csv" + new_csv.write_text( + "dialog_id,line_id,text\n1,1,howdy\n1,2,Howdy partner!\n", + encoding="utf-8", + ) + bot.train(str(new_csv)) + assert bot.chat("howdy") == "Howdy partner!" + + +# --------------------------------------------------------------------------- +# FastAPI tests +# --------------------------------------------------------------------------- + +@pytest.fixture +def api_client(dialog_file, monkeypatch): + """Return a TestClient with the FastAPI app; force offline mode.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + # Reset module-level state between tests + import api as api_module + api_module.clear_all_sessions() + + # Pre-populate a session backed by test data + from ai_chatbot import ChatBot + bot = ChatBot(dialog_file=dialog_file, force_offline=True) + sid = "test-session" + api_module._sessions[sid] = bot + + from fastapi.testclient import TestClient + return TestClient(api_module.app), sid + + +class TestAPI: + def test_health(self, api_client): + client, _ = api_client + resp = client.get("/health") + assert resp.status_code == 200 + assert resp.json()["status"] == "ok" + + def test_chat_creates_session(self, api_client): + client, _ = api_client + resp = client.post("/chat", json={"message": "hello"}) + assert resp.status_code == 200 + data = resp.json() + assert "reply" in data + assert "session_id" in data + assert data["mode"] in ("llm", "pattern") + + def test_chat_with_session(self, api_client): + client, sid = api_client + resp = client.post("/chat", json={"message": "hello", "session_id": sid}) + assert resp.status_code == 200 + data = resp.json() + assert data["session_id"] == sid + assert isinstance(data["reply"], str) + + def test_chat_empty_message(self, api_client): + client, _ = api_client + resp = client.post("/chat", json={"message": " "}) + assert resp.status_code == 400 + + def test_delete_session(self, api_client): + client, sid = api_client + resp = client.delete(f"/sessions/{sid}") + assert resp.status_code == 200 + + # Session should be gone now + resp2 = client.delete(f"/sessions/{sid}") + assert resp2.status_code == 404 + + def test_train_valid_file(self, api_client, dialog_file): + client, _ = api_client + resp = client.post("/train", json={"dialog_file": dialog_file}) + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "retrained" + assert data["patterns_loaded"] > 0 + + def test_train_missing_file(self, api_client): + client, _ = api_client + resp = client.post("/train", json={"dialog_file": "/nonexistent/file.csv"}) + assert resp.status_code == 404 diff --git a/web_demo.py b/web_demo.py new file mode 100644 index 0000000..d141b9f --- /dev/null +++ b/web_demo.py @@ -0,0 +1,121 @@ +""" +web_demo.py — Streamlit web interface for AI ChatBot. + +Run with: + streamlit run web_demo.py + +Environment variables: + OPENAI_API_KEY Set this to enable the LLM backend (optional). +""" + +import os + +import streamlit as st + +# Load .env if available +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from ai_chatbot import ChatBot + +# --------------------------------------------------------------------------- +# Page configuration +# --------------------------------------------------------------------------- +st.set_page_config( + page_title="AI ChatBot", + page_icon="🤖", + layout="centered", +) + +st.title("🤖 AI ChatBot") +st.caption( + "Powered by **OpenAI GPT** (LLM mode) when `OPENAI_API_KEY` is set, " + "otherwise uses offline **pattern-matching**." +) + +# --------------------------------------------------------------------------- +# Session-level chatbot instance +# --------------------------------------------------------------------------- +@st.cache_resource +def _get_bot_factory() -> ChatBot: + """Return a template bot (loads data once); actual per-session bots copy from this.""" + return ChatBot() + + +# Per-session bot stored in session_state so each browser tab/user gets its own history +if "bot" not in st.session_state: + st.session_state.bot = _get_bot_factory() + +bot: ChatBot = st.session_state.bot + +# Initialise message history in session state +if "messages" not in st.session_state: + st.session_state.messages: list[dict] = [] + st.session_state.messages.append( + { + "role": "assistant", + "content": "Hello! I'm your AI chatbot. How can I help you today?", + } + ) + +# --------------------------------------------------------------------------- +# Sidebar +# --------------------------------------------------------------------------- +with st.sidebar: + st.header("⚙️ Settings") + st.info(f"**Backend:** {'🤖 LLM (OpenAI)' if bot.mode == 'llm' else '📋 Pattern Matching'}") + + if st.button("🗑️ Clear conversation"): + st.session_state.messages = [ + { + "role": "assistant", + "content": "Conversation cleared. How can I help you?", + } + ] + bot.reset_history() + st.rerun() + + st.markdown("---") + st.markdown("### About") + st.markdown( + "This chatbot supports two backends:\n" + "- **LLM mode**: uses OpenAI's API for intelligent, context-aware replies.\n" + "- **Pattern mode**: offline fallback using a dialog dataset.\n\n" + "Set `OPENAI_API_KEY` in your environment or a `.env` file to enable LLM mode." + ) + +# --------------------------------------------------------------------------- +# Chat history display +# --------------------------------------------------------------------------- +for msg in st.session_state.messages: + with st.chat_message(msg["role"]): + st.markdown(msg["content"]) + +# --------------------------------------------------------------------------- +# Chat input +# --------------------------------------------------------------------------- +if prompt := st.chat_input("Type a message…"): + # Show user message + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + # Generate and stream the assistant reply + with st.chat_message("assistant"): + if bot.mode == "llm": + # Streaming response + response_placeholder = st.empty() + full_response = "" + for token in bot.stream_chat(prompt): + full_response += token + response_placeholder.markdown(full_response + "▌") + response_placeholder.markdown(full_response) + reply = full_response + else: + reply = bot.chat(prompt) + st.markdown(reply) + + st.session_state.messages.append({"role": "assistant", "content": reply}) From 70afb6911f75876c3e4be9d700d40db5bc343a81 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 07:35:17 +0000 Subject: [PATCH 3/4] Resolve conflicts with main; address review feedback in api.py, ai_chatbot.py, web_demo.py and test_chatbot.py Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com> --- .github/workflows/ci.yml | 55 ++++-- README.md | 352 ++++++++++++++++++++++++++------------- ai_chatbot.py | 10 +- api.py | 24 ++- requirements.txt | 1 + test_chatbot.py | 31 +++- web_demo.py | 9 +- 7 files changed, 324 insertions(+), 158 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75d7c6a..bc6e287 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,45 +6,66 @@ on: pull_request: branches: ["main", "master"] -# Restrict default GITHUB_TOKEN permissions to read-only permissions: contents: read jobs: - lint-and-test: + lint: runs-on: ubuntu-latest - permissions: - contents: read steps: - uses: actions/checkout@v4 - - name: Set up Python + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: python-version: "3.12" + cache: "pip" + + - name: Install flake8 + run: pip install flake8 + + - name: Lint — blocking (syntax errors / undefined names) + run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + + - name: Lint — style warnings (non-blocking) + run: flake8 . --count --exit-zero --max-line-length=100 --statistics + + test: + needs: lint + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # Stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # Treat all other issues as warnings (non-blocking) - flake8 . --count --exit-zero --max-line-length=100 --statistics + - name: Run tests with coverage + env: + OPENAI_API_KEY: "offline-mode-no-key-required" + run: pytest test_chatbot.py -v --tb=short --cov=. --cov-report=xml - - name: Run tests - run: pytest test_chatbot.py -v + - name: Upload coverage report + uses: actions/upload-artifact@v4 + with: + name: coverage-${{ matrix.python-version }} + path: coverage.xml docker-build: + needs: test runs-on: ubuntu-latest - needs: lint-and-test - permissions: - contents: read steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index eee76cc..75652c4 100644 --- a/README.md +++ b/README.md @@ -1,220 +1,332 @@ -# 🤖 Simple Python Chatbot with GUI (Tkinter) +# 🤖 AI ChatBot -## 🚀 Project Description -Welcome to the **Simple Python Chatbot Project with GUI**! This repository provides an interactive chatbot built with **Python**, using pattern matching on a dialog dataset from Kaggle, and running inside a friendly **Tkinter-based desktop GUI**. +![CI](https://github.com/joshuvavinith/AI_ChatBot/actions/workflows/ci.yml/badge.svg) +![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue) +![License](https://img.shields.io/badge/license-MIT-green) -The chatbot uses a simple but effective pattern matching approach to respond to user queries based on a dataset of over 1,700 dialog patterns. It automatically downloads a rich conversation dataset from Kaggle to provide more natural and varied responses. If Kaggle is unavailable, it falls back to a local dataset. It's perfect for beginners, students, or hobby projects. +A modern, extensible Python chatbot that generates context-aware, dynamic +responses. It ships with two interchangeable backends and three ready-to-use +interfaces — no external services are required to get started. + +| Mode | When active | What it uses | +|------|-------------|--------------| +| **LLM** | `OPENAI_API_KEY` is set | OpenAI Chat Completions (GPT-3.5 / GPT-4o) | +| **Pattern Matching** | No API key present | Offline CSV dialog dataset | + +Three interfaces are included out of the box: + +| Interface | Entry point | Default URL | +|-----------|-------------|-------------| +| **Streamlit web app** | `streamlit run web_demo.py` | `http://localhost:8501` | +| **FastAPI REST API** | `uvicorn api:app --reload` | `http://localhost:8000/docs` | +| **Tkinter desktop GUI** | `python ai_chatbot.py` | *(native window)* | --- ## 📚 Table of Contents - [Key Features](#-key-features) -- [Technologies Used](#-technologies-used) -- [Installation Instructions](#-installation-instructions) -- [Kaggle Dataset Setup](#-kaggle-dataset-setup) -- [Usage Example](#-usage-example) -- [Development Process](#-development-process) -- [Contributing Guidelines](#-contributing-guidelines) -- [Evaluation Metrics](#-evaluation-metrics) -- [Future Work](#-future-work) -- [Architecture Diagram](#-architecture-diagram) -- [Interaction with the Chatbot](#-interaction-with-the-chatbot) -- [Additional Information](#-additional-information) -- [Connect with Us](#-connect-with-us) +- [Quick Start](#-quick-start) +- [Prerequisites](#prerequisites) +- [Installation](#-installation) +- [Configuration](#-configuration) +- [Running the Chatbot](#-running-the-chatbot) + - [Desktop GUI (Tkinter)](#-desktop-gui-tkinter) + - [Web UI (Streamlit)](#-web-ui-streamlit) + - [REST API (FastAPI)](#-rest-api-fastapi) +- [Docker](#-docker) +- [API Reference](#-api-reference) +- [Testing](#-testing) +- [CI/CD](#-cicd) +- [Project Structure](#-project-structure) +- [Architecture](#-architecture) +- [Contributing](#-contributing) --- ## ✨ Key Features -- 🔍 **Pattern matching chatbot** using simple but effective techniques -- 💬 **Interactive GUI** built with Tkinter -- 📥 **Kaggle dataset integration** with automatic download -- 📝 **Fallback to local dialog dataset** if Kaggle is unavailable -- 🔁 Supports exact and partial matching for better responses -- ⚡ Lightweight with minimal external dependencies -- 🧩 Easily extensible by adding more dialog patterns +- 🤖 **LLM backend** — connects to OpenAI's API for intelligent, context-aware responses +- 📋 **Offline fallback** — pattern matching on a dialog dataset; works without internet/API key +- 🌐 **Streamlit web UI** — chat from any browser with streaming token output (LLM mode) +- 🔌 **FastAPI REST API** — `/chat` and `/train` endpoints; per-session conversation memory +- 🖥️ **Tkinter desktop GUI** — original GUI updated to show backend mode +- 🧠 **Conversation memory** — recent exchanges are passed to the LLM for follow-up questions +- 🐳 **Docker support** — single image supports both web and API modes via `MODE` build arg +- ✅ **Tests** — pytest suite covering core logic and API endpoints +- 🔄 **CI/CD** — GitHub Actions workflow: lint → test → Docker build --- -## 🛠️ Technologies Used +## 🚀 Quick Start + +```bash +git clone https://github.com/joshuvavinith/AI_ChatBot.git +cd AI_ChatBot +pip install -r requirements.txt + +# (optional) enable LLM mode +echo "OPENAI_API_KEY=sk-..." > .env -- **Python 3.x** – Works with any modern Python version -- **Tkinter** – Built-in GUI framework -- **KaggleHub** – For downloading Kaggle datasets -- **CSV** – For reading dialog data -- **Random** – For selecting varied responses -- **Git** – For version control +# Start the web UI +streamlit run web_demo.py +``` -> ✅ This implementation uses minimal external dependencies, with KaggleHub being the only non-standard library required. The core functionality works even without internet access by falling back to local data. +--- + +## Prerequisites + +- **Python 3.10 or later** (tested on 3.10, 3.11, and 3.12) +- **pip** (included with Python) +- *(Optional)* An [OpenAI API key](https://platform.openai.com/account/api-keys) to enable LLM mode +- *(Optional)* [Docker](https://docs.docker.com/get-docker/) for containerised deployment --- -## 🔧 Installation Instructions +## 🔧 Installation -### 1. Clone the Repository +### 1. Clone & set up environment ```bash git clone https://github.com/joshuvavinith/AI_ChatBot.git cd AI_ChatBot +python -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate +pip install -r requirements.txt ``` -### 2. Create a Virtual Environment (Recommended) +### 2. (Optional) Configure API key -```bash -python -m venv venv -source venv/bin/activate # On Windows: venv\Scripts\activate +Create a `.env` file in the project root: + +```dotenv +OPENAI_API_KEY=sk-your-key-here ``` -### 3. Install Dependencies +Or export it as an environment variable: ```bash -pip install -r requirements.txt +export OPENAI_API_KEY=sk-your-key-here ``` +Without an API key the bot automatically falls back to offline pattern matching. + +--- + +## ⚙️ Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `OPENAI_API_KEY` | *(unset)* | Enables LLM mode when present | + +### Kaggle Dataset (optional) + +The pattern-matching bot can use a richer Kaggle dialog dataset. +To enable it, place `kaggle.json` in `~/.kaggle/` (or set `KAGGLE_USERNAME` / `KAGGLE_KEY`). +If unavailable, the bot falls back to `dialog.csv`. + --- -## 🔑 Kaggle Dataset Setup +## 💬 Running the Chatbot -This project uses the [Simple Dialogs for Chatbot](https://www.kaggle.com/datasets/grafstor/simple-dialogs-for-chatbot) dataset via **KaggleHub**. The dataset contains over 1,800 conversation pairs that significantly enhance the chatbot's response capabilities. +### 🖥️ Desktop GUI (Tkinter) -### Dataset Features: +```bash +python ai_chatbot.py +``` -- 1,800+ question-answer pairs -- Covers a wide range of casual conversation topics -- Automatically downloaded and processed at runtime -- Converted from TXT to CSV format for compatibility +### 🌐 Web UI (Streamlit) -### To enable Kaggle downloads: +```bash +streamlit run web_demo.py +``` -1. Go to [kaggle.com/account](https://www.kaggle.com/account) and create an API token. -2. Download the `kaggle.json` file. -3. Place it in: +Open your browser at `http://localhost:8501`. - - Linux/macOS: `~/.kaggle/kaggle.json` - - Windows: `C:\Users\\.kaggle\kaggle.json` +Features: +- Full conversation history +- Streaming token output in LLM mode (looks like ChatGPT) +- "Clear conversation" button in the sidebar -Alternatively, set environment variables: +### 🔌 REST API (FastAPI) ```bash -export KAGGLE_USERNAME=your_username -export KAGGLE_KEY=your_key +uvicorn api:app --reload ``` -> ✅ If the dataset can't be downloaded, the chatbot will automatically fall back to the local dialog.csv file with basic conversation patterns. +Interactive docs available at `http://localhost:8000/docs`. --- -## 💬 Usage Example +## 🐳 Docker -### ▶️ To Run the GUI Chatbot: +### Build ```bash -python ai_chatbot.py +# Web UI (default) +docker build -t ai-chatbot:web . + +# API mode +docker build --build-arg MODE=api -t ai-chatbot:api . ``` -### 🖥️ GUI Features: +### Run + +```bash +# Web UI — visit http://localhost:8501 +docker run -p 8501:8501 -e OPENAI_API_KEY=sk-... ai-chatbot:web -- Type your message in the input box -- Hit **Enter** or click **Send** -- The chatbot responds immediately -- Say `"bye"` or `"exit"` to end the chat +# REST API — visit http://localhost:8000/docs +docker run -p 8000:8000 -e OPENAI_API_KEY=sk-... ai-chatbot:api +``` --- -## 🧠 Development Process +## 📡 API Reference -1. **Dataset Retrieval**: Uses `kagglehub` to fetch dialog data from Kaggle -2. **Training**: Trains ChatterBot using NLTK preprocessed dialogs -3. **Interface**: Built with Tkinter for easy interaction -4. **Fallback**: Uses hardcoded sample training data if download fails +### `POST /chat` ---- +Send a message and get a reply. Omit `session_id` to start a new session. + +```json +// Request +{ "message": "Hello!", "session_id": "optional-uuid" } + +// Response +{ + "reply": "Hi there! How can I help you?", + "session_id": "550e8400-e29b-41d4-a716-446655440000", + "mode": "pattern" +} +``` + +### `DELETE /sessions/{session_id}` + +Reset (delete) a conversation session. -## 🤝 Contributing Guidelines +### `POST /train` -We welcome contributions! 🙌 +Reload pattern-matching data from a CSV file on the server. -### How to Contribute: +```json +// Request +{ "dialog_file": "/path/to/dialog.csv" } -1. **Fork this repository** -2. Create a branch: +// Response +{ "status": "retrained", "patterns_loaded": 42 } +``` - ```bash - git checkout -b feature-branch - ``` +### `GET /health` -3. Make your changes and commit: +```json +{ "status": "ok" } +``` - ```bash - git commit -m "Add new feature" - ``` +--- -4. Push and create a PR: +## 🧪 Testing - ```bash - git push origin feature-branch - ``` +```bash +pytest test_chatbot.py -v +``` -> 💡 Follow Python best practices and test before submitting. +The test suite covers: +- `SimpleBot` — training, exact/partial matching, defaults, missing file +- `ChatBot` — offline mode, history management, streaming, history cap, retraining +- FastAPI — all endpoints (health, chat, delete session, train) --- -## 📈 Evaluation Metrics +## 🔄 CI/CD + +GitHub Actions runs on every push and pull request to `main`: + +1. **Lint** — `flake8` checks for syntax errors and undefined names +2. **Test** — `pytest` full suite with coverage, across Python 3.10, 3.11, and 3.12 +3. **Docker build** — both `web` and `api` images are built to verify the Dockerfile -- **BLEU Score** – Quality of generated response -- **Accuracy** – Expected vs actual answers -- **Responsiveness** – Time between input and output -- **User Feedback** – Manual quality testing +Coverage reports are uploaded as build artifacts for each Python version. --- -## 🌱 Future Work +## 📂 Project Structure -- 🌐 Add API/web support for Flask or FastAPI -- 🧠 Switch to GPT/BERT for smarter conversations -- 🗣️ Voice integration with `speech_recognition` -- 💾 Save and reload previous conversation history -- 🖥️ Package as a desktop app using `pyinstaller` +``` +AI_ChatBot/ +├── ai_chatbot.py # Core module: SimpleBot, LLMBot, ChatBot facade, Tkinter GUI +├── api.py # FastAPI REST backend +├── web_demo.py # Streamlit web interface +├── dialog.csv # Default offline dialog dataset +├── test_chatbot.py # Pytest test suite (SimpleBot, ChatBot, FastAPI) +├── requirements.txt # Python dependencies +├── Dockerfile # Multi-mode Docker image (web / api) +├── LICENSE # MIT License +└── .github/ + └── workflows/ + └── ci.yml # CI pipeline: lint → test → Docker build +``` --- -## 📊 Architecture Diagram +## 📐 Architecture ``` -+-------------+ +----------------------+ +-------------+ -| User Input +------->+ ChatBot Engine +------->+ Bot Reply | -+------+------+ +----------------------+ +-------------+ - | - v - [ Tkinter GUI ] - | - v -[ Kaggle Dataset Trainer ] ++------------------+ +------------------+ +------------------+ +| Streamlit Web | | FastAPI REST | | Tkinter Desktop | +| (web_demo.py) | | (api.py) | | (ai_chatbot.py) | ++--------+---------+ +--------+---------+ +--------+---------+ + | | | + +------------------------+-------------------------+ + | + +--------v---------+ + | ChatBot | ← ai_chatbot.py + | (facade) | + +--+----------+----+ + | | + +------------+ +------------+ + | | + +--------v---------+ +-----------v------+ + | LLMBot | | SimpleBot | + | (OpenAI API) | | (CSV patterns) | + +------------------+ +------------------+ ``` --- -## 💬 Interaction with the Chatbot +## 🤝 Contributing + +1. Fork this repository +2. Create a feature branch: `git checkout -b feature/my-feature` +3. Make your changes and run `pytest test_chatbot.py -v` +4. Commit and push: `git push origin feature/my-feature` +5. Open a pull request + +Please follow PEP 8 and include tests for any new logic. -The chatbot can be integrated or extended with: +### Adding dialog patterns + +To extend the offline pattern-matching bot, add rows to `dialog.csv`. +Each question/answer pair uses two rows sharing the same `dialog_id`: + +```csv +dialog_id,line_id,text +8,1,What is Python? +8,2,Python is a popular programming language! +``` -- 📚 Custom datasets (CSV/TXT) -- ☁️ Cloud API support -- 🔊 Voice UI -- 💡 Smart context-based conversations +- `line_id` **1** = the user question (matched case-insensitively) +- `line_id` **2** = the bot response --- -## 📱 Additional Information +## 📄 License -- **Live Demo**: Coming soon! -- **License**: [MIT License](./LICENSE) +[MIT License](./LICENSE) --- -## 🔗 Connect with Us +## 🔗 Connect -- 📧 Email: [joshuvavinith.g@care.ac.in](mailto:joshuvavinith.g@care.ac.in) -- 🐙 GitHub: [@joshuvavinith](https://github.com/joshuvavinith) +- 📧 [joshuavinith@gmail.com](mailto:joshuavinith@gmail.com) +- 🐙 [@joshuvavinith](https://github.com/joshuvavinith) diff --git a/ai_chatbot.py b/ai_chatbot.py index baf17bd..64ee8da 100644 --- a/ai_chatbot.py +++ b/ai_chatbot.py @@ -26,6 +26,7 @@ # Pattern-matching backend # --------------------------------------------------------------------------- + class SimpleBot: """Offline chatbot backed by a CSV dialog dataset.""" @@ -265,14 +266,15 @@ def _parse_kaggle_dialogs(file_path: str) -> Optional[str]: temp_csv_path = os.path.join(os.path.dirname(file_path), "converted_dialog.csv") dialog_id = 0 - with open(temp_csv_path, "w", encoding="utf-8") as fh: - fh.write("dialog_id,line_id,text\n") + with open(temp_csv_path, "w", encoding="utf-8", newline="") as fh: + writer = csv.writer(fh) + writer.writerow(["dialog_id", "line_id", "text"]) for i in range(0, len(lines) - 1, 2): q, a = lines[i].strip(), lines[i + 1].strip() if q and a: dialog_id += 1 - fh.write(f"{dialog_id},1,{q}\n") - fh.write(f"{dialog_id},2,{a}\n") + writer.writerow([dialog_id, 1, q]) + writer.writerow([dialog_id, 2, a]) print(f"Converted {dialog_id} dialog pairs to CSV format") return temp_csv_path diff --git a/api.py b/api.py index 42b2689..6d3c9ed 100644 --- a/api.py +++ b/api.py @@ -46,10 +46,15 @@ # Per-session bots (keyed by session_id string) _sessions: dict[str, ChatBot] = {} +# Dialog file used when creating new sessions (updated by /train) +_default_dialog_file: Optional[str] = None + def clear_all_sessions() -> None: - """Remove all active sessions. Intended for use in tests.""" + """Remove all active sessions and reset the default dialog file. Intended for use in tests.""" + global _default_dialog_file _sessions.clear() + _default_dialog_file = None def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: @@ -59,7 +64,7 @@ def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: session_id = str(uuid.uuid4()) if session_id not in _sessions: - _sessions[session_id] = ChatBot() + _sessions[session_id] = ChatBot(dialog_file=_default_dialog_file) return session_id, _sessions[session_id] @@ -127,21 +132,24 @@ def reset_session(session_id: str): def train(request: TrainRequest): """Reload pattern-matching data from a CSV file on the server. - The file must exist on the server filesystem. This endpoint only - affects future sessions created after the reload (existing sessions - keep their current bot instance). + The file must exist on the server filesystem. All existing sessions are + cleared so that subsequent requests create new sessions using the updated + training data. """ if not os.path.exists(request.dialog_file): raise HTTPException( status_code=404, - detail=f"File not found: {request.dialog_file}", + detail="Training file not found on server.", ) - # Retrain a fresh bot and store it as the template for new sessions + # Retrain a fresh bot to validate and count the patterns fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True) patterns = fresh_bot.pattern_count - # Clear all existing sessions so next requests pick up new data + # Persist the dialog file so new sessions created after this point use it, + # then clear existing sessions so they pick up the new data on next request. + global _default_dialog_file + _default_dialog_file = request.dialog_file _sessions.clear() return TrainResponse(status="retrained", patterns_loaded=patterns) diff --git a/requirements.txt b/requirements.txt index 256e933..0fe8029 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ uvicorn[standard]>=0.23.0 # Testing pytest>=7.4.0 +pytest-cov>=4.0.0 httpx>=0.25.0 \ No newline at end of file diff --git a/test_chatbot.py b/test_chatbot.py index fb814a2..4c4c07f 100644 --- a/test_chatbot.py +++ b/test_chatbot.py @@ -8,7 +8,6 @@ import os import tempfile import pytest -from fastapi.testclient import TestClient # --------------------------------------------------------------------------- # Fixtures @@ -216,3 +215,33 @@ def test_train_missing_file(self, api_client): client, _ = api_client resp = client.post("/train", json={"dialog_file": "/nonexistent/file.csv"}) assert resp.status_code == 404 + + def test_train_affects_new_chat_session(self, api_client): + client, _ = api_client + + new_dialog_content = ( + "dialog_id,line_id,text\n" + "1,1,hello\n" + "1,2,Hello from NEW dialog!\n" + ) + tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) + try: + tmp.write(new_dialog_content) + tmp.flush() + tmp_path = tmp.name + finally: + tmp.close() + + try: + # Retrain with the new dialog file + resp = client.post("/train", json={"dialog_file": tmp_path}) + assert resp.status_code == 200 + assert resp.json()["patterns_loaded"] > 0 + + # A brand-new session (no session_id) should use the new patterns + chat_resp = client.post("/chat", json={"message": "hello"}) + assert chat_resp.status_code == 200 + assert "Hello from NEW dialog" in chat_resp.json()["reply"] + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) diff --git a/web_demo.py b/web_demo.py index d141b9f..2909f04 100644 --- a/web_demo.py +++ b/web_demo.py @@ -8,8 +8,6 @@ OPENAI_API_KEY Set this to enable the LLM backend (optional). """ -import os - import streamlit as st # Load .env if available @@ -39,15 +37,10 @@ # --------------------------------------------------------------------------- # Session-level chatbot instance # --------------------------------------------------------------------------- -@st.cache_resource -def _get_bot_factory() -> ChatBot: - """Return a template bot (loads data once); actual per-session bots copy from this.""" - return ChatBot() - # Per-session bot stored in session_state so each browser tab/user gets its own history if "bot" not in st.session_state: - st.session_state.bot = _get_bot_factory() + st.session_state.bot = ChatBot() bot: ChatBot = st.session_state.bot From 741ac640d974d7330fb00ee67fbcc531e63c36cd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 08:34:37 +0000 Subject: [PATCH 4/4] Fix CI failure: restore _default_dialog_file in api.py and other changes reverted by merge commit Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com> --- ai_chatbot.py | 1 + api.py | 31 +++++++++++++++++++++++-------- test_chatbot.py | 1 - web_demo.py | 9 +-------- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/ai_chatbot.py b/ai_chatbot.py index baf17bd..6f40a21 100644 --- a/ai_chatbot.py +++ b/ai_chatbot.py @@ -26,6 +26,7 @@ # Pattern-matching backend # --------------------------------------------------------------------------- + class SimpleBot: """Offline chatbot backed by a CSV dialog dataset.""" diff --git a/api.py b/api.py index 42b2689..7865b99 100644 --- a/api.py +++ b/api.py @@ -46,10 +46,15 @@ # Per-session bots (keyed by session_id string) _sessions: dict[str, ChatBot] = {} +# Dialog file used when creating new sessions (updated by /train) +_default_dialog_file: Optional[str] = None + def clear_all_sessions() -> None: - """Remove all active sessions. Intended for use in tests.""" + """Remove all active sessions and reset the default dialog file. Intended for use in tests.""" + global _default_dialog_file _sessions.clear() + _default_dialog_file = None def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: @@ -59,7 +64,14 @@ def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]: session_id = str(uuid.uuid4()) if session_id not in _sessions: - _sessions[session_id] = ChatBot() + # When a dialog file has been explicitly configured via /train, create + # the bot in offline mode so the file is always used (no Kaggle fallback). + if _default_dialog_file is not None: + _sessions[session_id] = ChatBot( + dialog_file=_default_dialog_file, force_offline=True + ) + else: + _sessions[session_id] = ChatBot() return session_id, _sessions[session_id] @@ -127,21 +139,24 @@ def reset_session(session_id: str): def train(request: TrainRequest): """Reload pattern-matching data from a CSV file on the server. - The file must exist on the server filesystem. This endpoint only - affects future sessions created after the reload (existing sessions - keep their current bot instance). + The file must exist on the server filesystem. All existing sessions are + cleared so that subsequent requests create new sessions using the updated + training data. """ if not os.path.exists(request.dialog_file): raise HTTPException( status_code=404, - detail=f"File not found: {request.dialog_file}", + detail="Training file not found on server. Please verify the file path.", ) - # Retrain a fresh bot and store it as the template for new sessions + # Retrain a fresh bot to validate and count the patterns fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True) patterns = fresh_bot.pattern_count - # Clear all existing sessions so next requests pick up new data + # Persist the dialog file so new sessions created after this point use it, + # then clear existing sessions so they pick up the new data on next request. + global _default_dialog_file + _default_dialog_file = request.dialog_file _sessions.clear() return TrainResponse(status="retrained", patterns_loaded=patterns) diff --git a/test_chatbot.py b/test_chatbot.py index 468c8c6..4c4c07f 100644 --- a/test_chatbot.py +++ b/test_chatbot.py @@ -8,7 +8,6 @@ import os import tempfile import pytest -from fastapi.testclient import TestClient # --------------------------------------------------------------------------- # Fixtures diff --git a/web_demo.py b/web_demo.py index d141b9f..2909f04 100644 --- a/web_demo.py +++ b/web_demo.py @@ -8,8 +8,6 @@ OPENAI_API_KEY Set this to enable the LLM backend (optional). """ -import os - import streamlit as st # Load .env if available @@ -39,15 +37,10 @@ # --------------------------------------------------------------------------- # Session-level chatbot instance # --------------------------------------------------------------------------- -@st.cache_resource -def _get_bot_factory() -> ChatBot: - """Return a template bot (loads data once); actual per-session bots copy from this.""" - return ChatBot() - # Per-session bot stored in session_state so each browser tab/user gets its own history if "bot" not in st.session_state: - st.session_state.bot = _get_bot_factory() + st.session_state.bot = ChatBot() bot: ChatBot = st.session_state.bot