From a58b552ddfdd909ccce856837e5fe16908604be7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 20:14:20 +0000
Subject: [PATCH 1/4] Initial plan


From 33e1b68a3748b9701d921d6487f4a0cf1116f930 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 6 Mar 2026 20:20:21 +0000
Subject: [PATCH 2/4] Fix CI failures: add new chatbot modules and fix flake8
 F824 error in api.py

Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com>
---
 .github/workflows/ci.yml |  56 +++++
 .gitignore               |  27 +++
 Dockerfile               |  29 +++
 ai_chatbot.py            | 436 +++++++++++++++++++++++++++------------
 api.py                   | 147 +++++++++++++
 requirements.txt         |  19 +-
 test_chatbot.py          | 218 ++++++++++++++++++++
 web_demo.py              | 121 +++++++++++
 8 files changed, 923 insertions(+), 130 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 api.py
 create mode 100644 test_chatbot.py
 create mode 100644 web_demo.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..75d7c6a
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,56 @@
+name: CI
+
+on:
+  push:
+    branches: ["main", "master", "copilot/**"]
+  pull_request:
+    branches: ["main", "master"]
+
+# Restrict default GITHUB_TOKEN permissions to read-only
+permissions:
+  contents: read
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Lint with flake8
+        run: |
+          pip install flake8
+          # Stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # Treat all other issues as warnings (non-blocking)
+          flake8 . --count --exit-zero --max-line-length=100 --statistics
+
+      - name: Run tests
+        run: pytest test_chatbot.py -v
+
+  docker-build:
+    runs-on: ubuntu-latest
+    needs: lint-and-test
+    permissions:
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build Docker image (web mode)
+        run: docker build --build-arg MODE=web -t ai-chatbot:web .
+
+      - name: Build Docker image (api mode)
+        run: docker build --build-arg MODE=api -t ai-chatbot:api .
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..688ab4d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+# Python cache
+__pycache__/
+*.py[cod]
+
+# Virtual environment
+venv/
+.venv/
+
+# Jupyter/IPython
+.ipynb_checkpoints/
+
+# System files
+.DS_Store
+Thumbs.db
+
+# IDE settings
+.vscode/
+.idea/
+
+# Environment / secrets
+.env
+
+# Pytest cache
+.pytest_cache/
+
+# Temporary / generated files
+converted_dialog.csv
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..6e714b6
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.12-slim
+
+# Build arguments to select the runtime mode:
+#   web  → run the Streamlit web demo  (default)
+#   api  → run the FastAPI REST backend
+ARG MODE=web
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy source
+COPY . .
+
+# Expose ports
+#   8501 → Streamlit
+#   8000 → FastAPI / uvicorn
+EXPOSE 8501 8000
+
+ENV MODE=${MODE}
+
+CMD ["sh", "-c", "\
+  if [ \"$MODE\" = 'api' ]; then \
+    uvicorn api:app --host 0.0.0.0 --port 8000; \
+  else \
+    streamlit run web_demo.py --server.port 8501 --server.address 0.0.0.0; \
+  fi"]
diff --git a/ai_chatbot.py b/ai_chatbot.py
index cd0becc..baf17bd 100644
--- a/ai_chatbot.py
+++ b/ai_chatbot.py
@@ -1,167 +1,347 @@
-import os
-import tkinter as tk
-from tkinter import scrolledtext
+"""
+AI ChatBot — core module.
+
+Supports two backends, selected automatically:
+  1. LLM backend  – uses the OpenAI Chat Completions API when an
+                    OPENAI_API_KEY environment variable (or .env file) is present.
+  2. Pattern-matching backend – offline fallback using a CSV dialog dataset.
+
+The public surface area is intentionally small so that web_demo.py and
+api.py can both import from this module without pulling in Tkinter.
+"""
+
 import csv
+import os
 import random
-import kagglehub
+from typing import Optional
+
+# Load .env variables if python-dotenv is available
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+# ---------------------------------------------------------------------------
+# Pattern-matching backend
+# ---------------------------------------------------------------------------
 
-# Simple chatbot class using pattern matching
 class SimpleBot:
-    def __init__(self):
-        self.responses = {}
-        self.default_responses = [
+    """Offline chatbot backed by a CSV dialog dataset."""
+
+    def __init__(self) -> None:
+        self.responses: dict[str, list[str]] = {}
+        self.default_responses: list[str] = [
             "I'm not sure I understand. Could you rephrase that?",
             "Interesting question! I'm still learning.",
             "I don't have an answer for that yet.",
-            "Could you tell me more about that?"
+            "Could you tell me more about that?",
         ]
-    
-    def train(self, dialog_file):
+
+    def train(self, dialog_file: str) -> None:
         try:
             with open(dialog_file, encoding="utf-8") as file:
                 reader = csv.reader(file)
-                next(reader)  # Skip header row
-                current_dialog = None
-                question = None
-                
+                next(reader)  # skip header
+                question: Optional[str] = None
+
                 for row in reader:
-                    if len(row) >= 3:  # dialog_id, line_id, text
-                        dialog_id = row[0]
+                    if len(row) >= 3:
                         line_id = row[1]
                         text = row[2]
-                        
-                        if line_id == '1':  # This is a question/prompt
+
+                        if line_id == "1":
                             question = text.lower()
-                        elif line_id == '2' and question:  # This is a response
-                            if question not in self.responses:
-                                self.responses[question] = []
-                            self.responses[question].append(text)
+                        elif line_id == "2" and question:
+                            self.responses.setdefault(question, []).append(text)
                             question = None
+
             print(f"Trained with {len(self.responses)} dialog patterns")
-        except Exception as e:
-            print(f"Error loading training data: {e}")
-    
-    def get_response(self, message):
-        message = message.lower()
-        
-        # Check for exact matches
-        if message in self.responses:
-            return random.choice(self.responses[message])
-        
-        # Check for partial matches
-        for pattern, responses in self.responses.items():
-            if pattern in message or message in pattern:
-                return random.choice(responses)
-        
-        # Return default response if no match
+        except Exception as exc:
+            print(f"Error loading training data: {exc}")
+
+    def get_response(self, message: str, history: Optional[list] = None) -> str:
+        """Return a pattern-matched reply.  *history* is accepted but unused."""
+        key = message.lower()
+
+        if key in self.responses:
+            return random.choice(self.responses[key])
+
+        for pattern, replies in self.responses.items():
+            if pattern in key or key in pattern:
+                return random.choice(replies)
+
         return random.choice(self.default_responses)
 
-# Initialize chatbot
-chatbot = SimpleBot()
 
-# Add method to parse Kaggle's dialogs.txt format
-def parse_kaggle_dialogs(file_path):
+# ---------------------------------------------------------------------------
+# LLM backend (OpenAI)
+# ---------------------------------------------------------------------------
+
+class LLMBot:
+    """Chatbot backed by the OpenAI Chat Completions API."""
+
+    SYSTEM_PROMPT = (
+        "You are a helpful, friendly, and concise AI assistant. "
+        "Answer clearly and stay on topic."
+    )
+
+    def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo") -> None:
+        from openai import OpenAI  # deferred import so SimpleBot works without openai
+        self.model = model
+        self.client = OpenAI(api_key=api_key or os.environ.get("OPENAI_API_KEY"))
+
+    def get_response(self, message: str, history: Optional[list] = None) -> str:
+        """Call the OpenAI API and return the assistant reply.
+
+        *history* is a list of ``{"role": ..., "content": ...}`` dicts
+        representing the conversation so far (not including the current message).
+        """
+        messages = [{"role": "system", "content": self.SYSTEM_PROMPT}]
+        if history:
+            messages.extend(history)
+        messages.append({"role": "user", "content": message})
+
+        completion = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            max_tokens=512,
+            temperature=0.7,
+        )
+        return completion.choices[0].message.content.strip()
+
+    def stream_response(self, message: str, history: Optional[list] = None):
+        """Yield response tokens one by one (for streaming UIs)."""
+        messages = [{"role": "system", "content": self.SYSTEM_PROMPT}]
+        if history:
+            messages.extend(history)
+        messages.append({"role": "user", "content": message})
+
+        stream = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            max_tokens=512,
+            temperature=0.7,
+            stream=True,
+        )
+        for chunk in stream:
+            delta = chunk.choices[0].delta.content
+            if delta:
+                yield delta
+
+
+# ---------------------------------------------------------------------------
+# Unified ChatBot facade
+# ---------------------------------------------------------------------------
+
+class ChatBot:
+    """High-level chatbot that auto-selects LLM or pattern-matching backend.
+
+    Priority:
+      1. Use LLMBot if ``OPENAI_API_KEY`` is set.
+      2. Fall back to SimpleBot otherwise.
+
+    Conversation history is maintained internally so callers only need to
+    pass the current user message.
+    """
+
+    MAX_HISTORY = 20  # keep last N turns in context
+
+    def __init__(self, dialog_file: Optional[str] = None, force_offline: bool = False) -> None:
+        self.history: list[dict] = []
+        self._llm: Optional[LLMBot] = None
+        self._simple: Optional[SimpleBot] = None
+
+        if not force_offline and os.environ.get("OPENAI_API_KEY"):
+            try:
+                self._llm = LLMBot()
+                print("Using LLM backend (OpenAI).")
+            except Exception as exc:
+                print(f"LLM init failed ({exc}), falling back to pattern-matching.")
+
+        if self._llm is None:
+            self._simple = SimpleBot()
+            self._load_training_data(dialog_file)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    @property
+    def mode(self) -> str:
+        return "llm" if self._llm else "pattern"
+
+    def chat(self, message: str) -> str:
+        """Process *message* and return a reply; history is updated automatically."""
+        if self._llm:
+            reply = self._llm.get_response(message, self.history)
+        else:
+            reply = self._simple.get_response(message, self.history)  # type: ignore[union-attr]
+
+        self._update_history(message, reply)
+        return reply
+
+    def stream_chat(self, message: str):
+        """Yield response tokens (LLM mode) or yield the full reply at once."""
+        if self._llm:
+            tokens: list[str] = []
+            for token in self._llm.stream_response(message, self.history):
+                tokens.append(token)
+                yield token
+            reply = "".join(tokens)
+        else:
+            reply = self._simple.get_response(message, self.history)  # type: ignore[union-attr]
+            yield reply
+
+        self._update_history(message, reply)
+
+    @property
+    def pattern_count(self) -> int:
+        """Number of dialog patterns loaded (0 in LLM mode)."""
+        return len(self._simple.responses) if self._simple else 0
+
+    def reset_history(self) -> None:
+        self.history.clear()
+
+    def train(self, dialog_file: str) -> None:
+        """Retrain the pattern-matching bot from a CSV file."""
+        if self._simple:
+            self._simple.train(dialog_file)
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _update_history(self, user_msg: str, bot_reply: str) -> None:
+        self.history.append({"role": "user", "content": user_msg})
+        self.history.append({"role": "assistant", "content": bot_reply})
+        # Trim to avoid unbounded growth
+        if len(self.history) > self.MAX_HISTORY * 2:
+            self.history = self.history[-(self.MAX_HISTORY * 2):]
+
+    def _load_training_data(self, dialog_file: Optional[str]) -> None:
+        if dialog_file and os.path.exists(dialog_file):
+            self._simple.train(dialog_file)  # type: ignore[union-attr]
+            return
+
+        # Try Kaggle dataset first, fall back to local dialog.csv
+        try:
+            import kagglehub
+            print("Attempting to download chatbot data from Kaggle...")
+            data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot")
+
+            for root_dir, _, files in os.walk(data_path):
+                for fname in files:
+                    if fname.lower() == "dialogs.txt":
+                        kaggle_file = os.path.join(root_dir, fname)
+                        converted = _parse_kaggle_dialogs(kaggle_file)
+                        if converted:
+                            print("Training chatbot with Kaggle data...")
+                            self._simple.train(converted)  # type: ignore[union-attr]
+                            return
+            raise FileNotFoundError("dialogs.txt not found in Kaggle dataset")
+
+        except Exception as exc:
+            print(f"Kaggle dataset unavailable ({exc}), using local data.")
+
+        local = os.path.join(os.path.dirname(__file__), "dialog.csv")
+        if os.path.exists(local):
+            print("Training chatbot with local dialog data...")
+            self._simple.train(local)  # type: ignore[union-attr]
+        else:
+            print("No training data found; bot will use default responses only.")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _parse_kaggle_dialogs(file_path: str) -> Optional[str]:
+    """Convert Kaggle's dialogs.txt into the CSV format expected by SimpleBot."""
     try:
         print(f"Parsing Kaggle dialogs from {file_path}...")
-        dialog_pairs = []
-        current_dialog_id = 0
-        
-        with open(file_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-            
-        # Create a temporary CSV file in the expected format
+        with open(file_path, "r", encoding="utf-8") as fh:
+            lines = fh.readlines()
+
         temp_csv_path = os.path.join(os.path.dirname(file_path), "converted_dialog.csv")
-        with open(temp_csv_path, 'w', encoding='utf-8') as f:
-            f.write("dialog_id,line_id,text\n")
-            
-            for i in range(0, len(lines)-1, 2):
-                if i+1 < len(lines):
-                    question = lines[i].strip()
-                    answer = lines[i+1].strip()
-                    
-                    if question and answer:
-                        current_dialog_id += 1
-                        f.write(f"{current_dialog_id},1,{question}\n")
-                        f.write(f"{current_dialog_id},2,{answer}\n")
-        
-        print(f"Converted {current_dialog_id} dialog pairs to CSV format")
+        dialog_id = 0
+        with open(temp_csv_path, "w", encoding="utf-8") as fh:
+            fh.write("dialog_id,line_id,text\n")
+            for i in range(0, len(lines) - 1, 2):
+                q, a = lines[i].strip(), lines[i + 1].strip()
+                if q and a:
+                    dialog_id += 1
+                    fh.write(f"{dialog_id},1,{q}\n")
+                    fh.write(f"{dialog_id},2,{a}\n")
+
+        print(f"Converted {dialog_id} dialog pairs to CSV format")
         return temp_csv_path
-    except Exception as e:
-        print(f"Error parsing Kaggle dialogs: {e}")
+    except Exception as exc:
+        print(f"Error parsing Kaggle dialogs: {exc}")
         return None
 
-# Try to download the dataset from Kaggle using kagglehub
-try:
-    print("Attempting to download chatbot data from Kaggle...")
-    data_path = kagglehub.dataset_download("grafstor/simple-dialogs-for-chatbot")
-    
-    # Look for dialogs.txt (the actual file in the dataset)
-    for root, dirs, files in os.walk(data_path):
-        for file in files:
-            if file.lower() == "dialogs.txt":
-                kaggle_dialog_file = os.path.join(root, file)
-                print(f"Found Kaggle dialog file: {kaggle_dialog_file}")
-                
-                # Convert the Kaggle format to our expected CSV format
-                converted_file = parse_kaggle_dialogs(kaggle_dialog_file)
-                if converted_file:
-                    print("Training chatbot with Kaggle data...")
-                    chatbot.train(converted_file)
-                    break
-        else:
-            continue
-        break
-    else:
-        raise FileNotFoundError("Dialog file not found in Kaggle dataset")
-        
-except Exception as e:
-    print(f"Error with Kaggle dataset: {e}")
-    print("Falling back to local dialog data...")
-    
-    # Fallback to local dialog.csv file
-    local_dialog_file = "dialog.csv"
-    try:
-        print("Training chatbot with local dialog data...")
-        chatbot.train(local_dialog_file)
-    except Exception as e:
-        print(f"Error loading local training data: {e}")
 
-# Create GUI
-root = tk.Tk()
-root.title("Chat with GUIBot")
-root.geometry("500x550")
+# ---------------------------------------------------------------------------
+# Tkinter GUI  (only runs when this file is executed directly)
+# ---------------------------------------------------------------------------
 
-chat_log = scrolledtext.ScrolledText(root, wrap=tk.WORD)
-chat_log.config(state=tk.DISABLED)
-chat_log.pack(padx=10, pady=10, fill=tk.BOTH, expand=True)
+def _run_gui() -> None:
+    import tkinter as tk
+    from tkinter import scrolledtext
 
-entry_frame = tk.Frame(root)
-entry_frame.pack(padx=10, pady=10, fill=tk.X)
+    bot = ChatBot()
 
-user_input = tk.Entry(entry_frame, font=("Arial", 14))
-user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10))
+    gui_root = tk.Tk()
+    gui_root.title("Chat with AI Bot")
+    gui_root.geometry("500x580")
 
+    # ── mode badge ──────────────────────────────────────────────────────────
+    mode_label = tk.Label(
+        gui_root,
+        text=f"Mode: {'🤖 LLM (OpenAI)' if bot.mode == 'llm' else '📋 Pattern Matching'}",
+        font=("Arial", 10),
+        fg="#555",
+    )
+    mode_label.pack(anchor="w", padx=12, pady=(6, 0))
 
-def send_message():
-    message = user_input.get()
-    if message.strip():
+    # ── chat log ────────────────────────────────────────────────────────────
+    chat_log = scrolledtext.ScrolledText(gui_root, wrap=tk.WORD)
+    chat_log.config(state=tk.DISABLED)
+    chat_log.pack(padx=10, pady=6, fill=tk.BOTH, expand=True)
+
+    # ── input row ───────────────────────────────────────────────────────────
+    entry_frame = tk.Frame(gui_root)
+    entry_frame.pack(padx=10, pady=10, fill=tk.X)
+
+    user_input = tk.Entry(entry_frame, font=("Arial", 14))
+    user_input.pack(side=tk.LEFT, fill=tk.BOTH, expand=True, padx=(0, 10))
+
+    def send_message() -> None:
+        message = user_input.get().strip()
+        if not message:
+            return
         chat_log.config(state=tk.NORMAL)
-        chat_log.insert(tk.END, "You: " + message + "\n")
-        response = chatbot.get_response(message)
-        chat_log.insert(tk.END, "Bot: " + str(response) + "\n\n")
+        chat_log.insert(tk.END, f"You: {message}\n")
+        user_input.delete(0, tk.END)
+
+        response = bot.chat(message)
+        chat_log.insert(tk.END, f"Bot: {response}\n\n")
         chat_log.config(state=tk.DISABLED)
         chat_log.yview(tk.END)
-        user_input.delete(0, tk.END)
 
-send_btn = tk.Button(entry_frame, text="Send", command=send_message)
-send_btn.pack(side=tk.RIGHT)
+    send_btn = tk.Button(entry_frame, text="Send", command=send_message)
+    send_btn.pack(side=tk.RIGHT)
+
+    user_input.bind("<Return>", lambda _: send_message())
+
+    # ── welcome message ─────────────────────────────────────────────────────
+    chat_log.config(state=tk.NORMAL)
+    chat_log.insert(tk.END, "Bot: Hello! I'm your AI chatbot. How can I help you today?\n\n")
+    chat_log.config(state=tk.DISABLED)
 
-user_input.bind("<Return>", lambda event=None: send_message())
+    print("Starting GUI chatbot...")
+    gui_root.mainloop()
 
-# Add a welcome message
-chat_log.config(state=tk.NORMAL)
-chat_log.insert(tk.END, "Bot: Hello! I'm your simple chatbot. How can I help you today?\n\n")
-chat_log.config(state=tk.DISABLED)
 
-print("Starting GUI chatbot...")
-root.mainloop()
+if __name__ == "__main__":
+    _run_gui()
diff --git a/api.py b/api.py
new file mode 100644
index 0000000..42b2689
--- /dev/null
+++ b/api.py
@@ -0,0 +1,147 @@
+"""
+api.py — FastAPI REST backend for AI ChatBot.
+
+Run with:
+    uvicorn api:app --reload
+
+Endpoints:
+    POST /chat           Send a message and receive a reply.
+    POST /train          Reload the pattern-matching bot from a CSV file.
+    GET  /health         Health check.
+    DELETE /sessions/{id} Reset a conversation session.
+
+Environment variables:
+    OPENAI_API_KEY   Optional – enables the LLM backend.
+"""
+
+import os
+import uuid
+from typing import Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+
+# Load .env if available
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+from ai_chatbot import ChatBot
+
+# ---------------------------------------------------------------------------
+# App setup
+# ---------------------------------------------------------------------------
+
+app = FastAPI(
+    title="AI ChatBot API",
+    description=(
+        "REST API for the AI ChatBot. Supports LLM (OpenAI) and offline "
+        "pattern-matching backends. Maintains per-session conversation history."
+    ),
+    version="1.0.0",
+)
+
+# Per-session bots (keyed by session_id string)
+_sessions: dict[str, ChatBot] = {}
+
+
+def clear_all_sessions() -> None:
+    """Remove all active sessions. Intended for use in tests."""
+    _sessions.clear()
+
+
+def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]:
+    """Return (session_id, ChatBot) for the given session; create if missing."""
+    if session_id is None:
+        # Create a new session
+        session_id = str(uuid.uuid4())
+
+    if session_id not in _sessions:
+        _sessions[session_id] = ChatBot()
+
+    return session_id, _sessions[session_id]
+
+
+# ---------------------------------------------------------------------------
+# Request / Response schemas
+# ---------------------------------------------------------------------------
+
+class ChatRequest(BaseModel):
+    message: str
+    session_id: Optional[str] = None
+
+
+class ChatResponse(BaseModel):
+    reply: str
+    session_id: str
+    mode: str
+
+
+class TrainRequest(BaseModel):
+    dialog_file: str
+
+
+class TrainResponse(BaseModel):
+    status: str
+    patterns_loaded: int
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+@app.get("/health")
+def health_check():
+    """Simple health check."""
+    return {"status": "ok"}
+
+
+@app.post("/chat", response_model=ChatResponse)
+def chat(request: ChatRequest):
+    """Send a message and receive a reply.
+
+    If *session_id* is omitted, a new session is created and its ID is
+    returned so the client can continue the conversation.
+    """
+    if not request.message.strip():
+        raise HTTPException(status_code=400, detail="Message must not be empty.")
+
+    session_id, bot = _get_or_create_session(request.session_id)
+    reply = bot.chat(request.message)
+
+    return ChatResponse(reply=reply, session_id=session_id, mode=bot.mode)
+
+
+@app.delete("/sessions/{session_id}")
+def reset_session(session_id: str):
+    """Delete a conversation session (clears history)."""
+    if session_id in _sessions:
+        del _sessions[session_id]
+        return {"status": "session deleted", "session_id": session_id}
+    raise HTTPException(status_code=404, detail="Session not found.")
+
+
+@app.post("/train", response_model=TrainResponse)
+def train(request: TrainRequest):
+    """Reload pattern-matching data from a CSV file on the server.
+
+    The file must exist on the server filesystem.  This endpoint only
+    affects future sessions created after the reload (existing sessions
+    keep their current bot instance).
+    """
+    if not os.path.exists(request.dialog_file):
+        raise HTTPException(
+            status_code=404,
+            detail=f"File not found: {request.dialog_file}",
+        )
+
+    # Retrain a fresh bot and store it as the template for new sessions
+    fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True)
+    patterns = fresh_bot.pattern_count
+
+    # Clear all existing sessions so next requests pick up new data
+    _sessions.clear()
+
+    return TrainResponse(status="retrained", patterns_loaded=patterns)
diff --git a/requirements.txt b/requirements.txt
index 7c416df..256e933 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,17 @@
-# Dependencies for the chatbot with Kaggle dataset integration
-kagglehub
\ No newline at end of file
+# Core
+kagglehub
+python-dotenv>=1.0.0
+
+# LLM backend (optional — enables OpenAI-powered responses)
+openai>=1.0.0
+
+# Web UI
+streamlit>=1.28.0
+
+# REST API
+fastapi>=0.104.0
+uvicorn[standard]>=0.23.0
+
+# Testing
+pytest>=7.4.0
+httpx>=0.25.0
\ No newline at end of file
diff --git a/test_chatbot.py b/test_chatbot.py
new file mode 100644
index 0000000..fb814a2
--- /dev/null
+++ b/test_chatbot.py
@@ -0,0 +1,218 @@
+"""
+test_chatbot.py — Unit tests for AI ChatBot.
+
+Run with:
+    pytest test_chatbot.py -v
+"""
+
+import os
+import tempfile
+import pytest
+from fastapi.testclient import TestClient
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+DIALOG_CSV = """\
+dialog_id,line_id,text
+1,1,hello
+1,2,Hi there! How can I help?
+2,1,what is your name
+2,2,I'm a chatbot built with Python!
+3,1,bye
+3,2,Goodbye! Have a great day.
+"""
+
+
+@pytest.fixture
+def dialog_file(tmp_path):
+    """Write a small dialog CSV and return its path."""
+    path = tmp_path / "test_dialog.csv"
+    path.write_text(DIALOG_CSV, encoding="utf-8")
+    return str(path)
+
+
+# ---------------------------------------------------------------------------
+# SimpleBot tests
+# ---------------------------------------------------------------------------
+
+class TestSimpleBot:
+    def test_train_loads_patterns(self, dialog_file):
+        from ai_chatbot import SimpleBot
+
+        bot = SimpleBot()
+        bot.train(dialog_file)
+        assert len(bot.responses) == 3
+
+    def test_exact_match(self, dialog_file):
+        from ai_chatbot import SimpleBot
+
+        bot = SimpleBot()
+        bot.train(dialog_file)
+        assert bot.get_response("hello") == "Hi there! How can I help?"
+
+    def test_partial_match(self, dialog_file):
+        from ai_chatbot import SimpleBot
+
+        bot = SimpleBot()
+        bot.train(dialog_file)
+        reply = bot.get_response("hey, hello there")
+        assert reply == "Hi there! How can I help?"
+
+    def test_default_response_on_unknown(self, dialog_file):
+        from ai_chatbot import SimpleBot
+
+        bot = SimpleBot()
+        bot.train(dialog_file)
+        reply = bot.get_response("zxqwerty unknown phrase 12345")
+        assert isinstance(reply, str)
+        assert len(reply) > 0
+
+    def test_train_missing_file(self):
+        from ai_chatbot import SimpleBot
+
+        bot = SimpleBot()
+        bot.train("/nonexistent/path/dialog.csv")
+        assert bot.responses == {}
+
+
+# ---------------------------------------------------------------------------
+# ChatBot (facade) tests — forced offline mode
+# ---------------------------------------------------------------------------
+
+class TestChatBot:
+    def test_offline_mode(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        assert bot.mode == "pattern"
+
+    def test_chat_returns_string(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        reply = bot.chat("hello")
+        assert isinstance(reply, str)
+        assert len(reply) > 0
+
+    def test_history_grows_with_turns(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        assert bot.history == []
+        bot.chat("hello")
+        assert len(bot.history) == 2  # one user + one assistant
+
+    def test_reset_history(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        bot.chat("hello")
+        bot.reset_history()
+        assert bot.history == []
+
+    def test_stream_chat_yields_text(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        tokens = list(bot.stream_chat("hello"))
+        assert len(tokens) >= 1
+        assert "".join(tokens) == "Hi there! How can I help?"
+
+    def test_history_capped(self, dialog_file):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+        # Drive history past MAX_HISTORY * 2
+        for _ in range(bot.MAX_HISTORY + 5):
+            bot.chat("hello")
+        assert len(bot.history) <= bot.MAX_HISTORY * 2
+
+    def test_retrain(self, dialog_file, tmp_path):
+        from ai_chatbot import ChatBot
+
+        bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+
+        new_csv = tmp_path / "new_dialog.csv"
+        new_csv.write_text(
+            "dialog_id,line_id,text\n1,1,howdy\n1,2,Howdy partner!\n",
+            encoding="utf-8",
+        )
+        bot.train(str(new_csv))
+        assert bot.chat("howdy") == "Howdy partner!"
+
+
+# ---------------------------------------------------------------------------
+# FastAPI tests
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def api_client(dialog_file, monkeypatch):
+    """Return a TestClient with the FastAPI app; force offline mode."""
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    # Reset module-level state between tests
+    import api as api_module
+    api_module.clear_all_sessions()
+
+    # Pre-populate a session backed by test data
+    from ai_chatbot import ChatBot
+    bot = ChatBot(dialog_file=dialog_file, force_offline=True)
+    sid = "test-session"
+    api_module._sessions[sid] = bot
+
+    from fastapi.testclient import TestClient
+    return TestClient(api_module.app), sid
+
+
+class TestAPI:
+    def test_health(self, api_client):
+        client, _ = api_client
+        resp = client.get("/health")
+        assert resp.status_code == 200
+        assert resp.json()["status"] == "ok"
+
+    def test_chat_creates_session(self, api_client):
+        client, _ = api_client
+        resp = client.post("/chat", json={"message": "hello"})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "reply" in data
+        assert "session_id" in data
+        assert data["mode"] in ("llm", "pattern")
+
+    def test_chat_with_session(self, api_client):
+        client, sid = api_client
+        resp = client.post("/chat", json={"message": "hello", "session_id": sid})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["session_id"] == sid
+        assert isinstance(data["reply"], str)
+
+    def test_chat_empty_message(self, api_client):
+        client, _ = api_client
+        resp = client.post("/chat", json={"message": "   "})
+        assert resp.status_code == 400
+
+    def test_delete_session(self, api_client):
+        client, sid = api_client
+        resp = client.delete(f"/sessions/{sid}")
+        assert resp.status_code == 200
+
+        # Session should be gone now
+        resp2 = client.delete(f"/sessions/{sid}")
+        assert resp2.status_code == 404
+
+    def test_train_valid_file(self, api_client, dialog_file):
+        client, _ = api_client
+        resp = client.post("/train", json={"dialog_file": dialog_file})
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["status"] == "retrained"
+        assert data["patterns_loaded"] > 0
+
+    def test_train_missing_file(self, api_client):
+        client, _ = api_client
+        resp = client.post("/train", json={"dialog_file": "/nonexistent/file.csv"})
+        assert resp.status_code == 404
diff --git a/web_demo.py b/web_demo.py
new file mode 100644
index 0000000..d141b9f
--- /dev/null
+++ b/web_demo.py
@@ -0,0 +1,121 @@
+"""
+web_demo.py — Streamlit web interface for AI ChatBot.
+
+Run with:
+    streamlit run web_demo.py
+
+Environment variables:
+    OPENAI_API_KEY   Set this to enable the LLM backend (optional).
+"""
+
+import os
+
+import streamlit as st
+
+# Load .env if available
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+from ai_chatbot import ChatBot
+
+# ---------------------------------------------------------------------------
+# Page configuration
+# ---------------------------------------------------------------------------
+st.set_page_config(
+    page_title="AI ChatBot",
+    page_icon="🤖",
+    layout="centered",
+)
+
+st.title("🤖 AI ChatBot")
+st.caption(
+    "Powered by **OpenAI GPT** (LLM mode) when `OPENAI_API_KEY` is set, "
+    "otherwise uses offline **pattern-matching**."
+)
+
+# ---------------------------------------------------------------------------
+# Session-level chatbot instance
+# ---------------------------------------------------------------------------
+@st.cache_resource
+def _get_bot_factory() -> ChatBot:
+    """Return a template bot (loads data once); actual per-session bots copy from this."""
+    return ChatBot()
+
+
+# Per-session bot stored in session_state so each browser tab/user gets its own history
+if "bot" not in st.session_state:
+    st.session_state.bot = _get_bot_factory()
+
+bot: ChatBot = st.session_state.bot
+
+# Initialise message history in session state
+if "messages" not in st.session_state:
+    st.session_state.messages: list[dict] = []
+    st.session_state.messages.append(
+        {
+            "role": "assistant",
+            "content": "Hello! I'm your AI chatbot. How can I help you today?",
+        }
+    )
+
+# ---------------------------------------------------------------------------
+# Sidebar
+# ---------------------------------------------------------------------------
+with st.sidebar:
+    st.header("⚙️ Settings")
+    st.info(f"**Backend:** {'🤖 LLM (OpenAI)' if bot.mode == 'llm' else '📋 Pattern Matching'}")
+
+    if st.button("🗑️ Clear conversation"):
+        st.session_state.messages = [
+            {
+                "role": "assistant",
+                "content": "Conversation cleared. How can I help you?",
+            }
+        ]
+        bot.reset_history()
+        st.rerun()
+
+    st.markdown("---")
+    st.markdown("### About")
+    st.markdown(
+        "This chatbot supports two backends:\n"
+        "- **LLM mode**: uses OpenAI's API for intelligent, context-aware replies.\n"
+        "- **Pattern mode**: offline fallback using a dialog dataset.\n\n"
+        "Set `OPENAI_API_KEY` in your environment or a `.env` file to enable LLM mode."
+    )
+
+# ---------------------------------------------------------------------------
+# Chat history display
+# ---------------------------------------------------------------------------
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+
+# ---------------------------------------------------------------------------
+# Chat input
+# ---------------------------------------------------------------------------
+if prompt := st.chat_input("Type a message…"):
+    # Show user message
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+
+    # Generate and stream the assistant reply
+    with st.chat_message("assistant"):
+        if bot.mode == "llm":
+            # Streaming response
+            response_placeholder = st.empty()
+            full_response = ""
+            for token in bot.stream_chat(prompt):
+                full_response += token
+                response_placeholder.markdown(full_response + "▌")
+            response_placeholder.markdown(full_response)
+            reply = full_response
+        else:
+            reply = bot.chat(prompt)
+            st.markdown(reply)
+
+    st.session_state.messages.append({"role": "assistant", "content": reply})

From 70afb6911f75876c3e4be9d700d40db5bc343a81 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Mar 2026 07:35:17 +0000
Subject: [PATCH 3/4] Resolve conflicts with main; address review feedback in
 api.py, ai_chatbot.py, web_demo.py and test_chatbot.py

Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com>
---
 .github/workflows/ci.yml |  55 ++++--
 README.md                | 352 ++++++++++++++++++++++++++-------------
 ai_chatbot.py            |  10 +-
 api.py                   |  24 ++-
 requirements.txt         |   1 +
 test_chatbot.py          |  31 +++-
 web_demo.py              |   9 +-
 7 files changed, 324 insertions(+), 158 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 75d7c6a..bc6e287 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,45 +6,66 @@ on:
   pull_request:
     branches: ["main", "master"]
 
-# Restrict default GITHUB_TOKEN permissions to read-only
 permissions:
   contents: read
 
 jobs:
-  lint-and-test:
+  lint:
     runs-on: ubuntu-latest
-    permissions:
-      contents: read
 
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up Python
+      - name: Set up Python 3.12
         uses: actions/setup-python@v5
         with:
           python-version: "3.12"
+          cache: "pip"
+
+      - name: Install flake8
+        run: pip install flake8
+
+      - name: Lint — blocking (syntax errors / undefined names)
+        run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+      - name: Lint — style warnings (non-blocking)
+        run: flake8 . --count --exit-zero --max-line-length=100 --statistics
+
+  test:
+    needs: lint
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: "pip"
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
 
-      - name: Lint with flake8
-        run: |
-          pip install flake8
-          # Stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          # Treat all other issues as warnings (non-blocking)
-          flake8 . --count --exit-zero --max-line-length=100 --statistics
+      - name: Run tests with coverage
+        env:
+          OPENAI_API_KEY: "offline-mode-no-key-required"
+        run: pytest test_chatbot.py -v --tb=short --cov=. --cov-report=xml
 
-      - name: Run tests
-        run: pytest test_chatbot.py -v
+      - name: Upload coverage report
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-${{ matrix.python-version }}
+          path: coverage.xml
 
   docker-build:
+    needs: test
     runs-on: ubuntu-latest
-    needs: lint-and-test
-    permissions:
-      contents: read
 
     steps:
       - uses: actions/checkout@v4
diff --git a/README.md b/README.md
index eee76cc..75652c4 100644
--- a/README.md
+++ b/README.md
@@ -1,220 +1,332 @@
 
-# 🤖 Simple Python Chatbot with GUI (Tkinter)
+# 🤖 AI ChatBot
 
-## 🚀 Project Description  
-Welcome to the **Simple Python Chatbot Project with GUI**! This repository provides an interactive chatbot built with **Python**, using pattern matching on a dialog dataset from Kaggle, and running inside a friendly **Tkinter-based desktop GUI**.
+![CI](https://github.com/joshuvavinith/AI_ChatBot/actions/workflows/ci.yml/badge.svg)
+![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
+![License](https://img.shields.io/badge/license-MIT-green)
 
-The chatbot uses a simple but effective pattern matching approach to respond to user queries based on a dataset of over 1,700 dialog patterns. It automatically downloads a rich conversation dataset from Kaggle to provide more natural and varied responses. If Kaggle is unavailable, it falls back to a local dataset. It's perfect for beginners, students, or hobby projects.
+A modern, extensible Python chatbot that generates context-aware, dynamic
+responses. It ships with two interchangeable backends and three ready-to-use
+interfaces — no external services are required to get started.
+
+| Mode | When active | What it uses |
+|------|-------------|--------------|
+| **LLM** | `OPENAI_API_KEY` is set | OpenAI Chat Completions (GPT-3.5 / GPT-4o) |
+| **Pattern Matching** | No API key present | Offline CSV dialog dataset |
+
+Three interfaces are included out of the box:
+
+| Interface | Entry point | Default URL |
+|-----------|-------------|-------------|
+| **Streamlit web app** | `streamlit run web_demo.py` | `http://localhost:8501` |
+| **FastAPI REST API** | `uvicorn api:app --reload` | `http://localhost:8000/docs` |
+| **Tkinter desktop GUI** | `python ai_chatbot.py` | *(native window)* |
 
 ---
 
 ## 📚 Table of Contents
 
 - [Key Features](#-key-features)
-- [Technologies Used](#-technologies-used)
-- [Installation Instructions](#-installation-instructions)
-- [Kaggle Dataset Setup](#-kaggle-dataset-setup)
-- [Usage Example](#-usage-example)
-- [Development Process](#-development-process)
-- [Contributing Guidelines](#-contributing-guidelines)
-- [Evaluation Metrics](#-evaluation-metrics)
-- [Future Work](#-future-work)
-- [Architecture Diagram](#-architecture-diagram)
-- [Interaction with the Chatbot](#-interaction-with-the-chatbot)
-- [Additional Information](#-additional-information)
-- [Connect with Us](#-connect-with-us)
+- [Quick Start](#-quick-start)
+- [Prerequisites](#prerequisites)
+- [Installation](#-installation)
+- [Configuration](#-configuration)
+- [Running the Chatbot](#-running-the-chatbot)
+  - [Desktop GUI (Tkinter)](#-desktop-gui-tkinter)
+  - [Web UI (Streamlit)](#-web-ui-streamlit)
+  - [REST API (FastAPI)](#-rest-api-fastapi)
+- [Docker](#-docker)
+- [API Reference](#-api-reference)
+- [Testing](#-testing)
+- [CI/CD](#-cicd)
+- [Project Structure](#-project-structure)
+- [Architecture](#-architecture)
+- [Contributing](#-contributing)
 
 ---
 
 ## ✨ Key Features
 
-- 🔍 **Pattern matching chatbot** using simple but effective techniques
-- 💬 **Interactive GUI** built with Tkinter
-- 📥 **Kaggle dataset integration** with automatic download
-- 📝 **Fallback to local dialog dataset** if Kaggle is unavailable
-- 🔁 Supports exact and partial matching for better responses
-- ⚡ Lightweight with minimal external dependencies
-- 🧩 Easily extensible by adding more dialog patterns
+- 🤖 **LLM backend** — connects to OpenAI's API for intelligent, context-aware responses
+- 📋 **Offline fallback** — pattern matching on a dialog dataset; works without internet/API key
+- 🌐 **Streamlit web UI** — chat from any browser with streaming token output (LLM mode)
+- 🔌 **FastAPI REST API** — `/chat` and `/train` endpoints; per-session conversation memory
+- 🖥️ **Tkinter desktop GUI** — original GUI updated to show backend mode
+- 🧠 **Conversation memory** — recent exchanges are passed to the LLM for follow-up questions
+- 🐳 **Docker support** — single image supports both web and API modes via `MODE` build arg
+- ✅ **Tests** — pytest suite covering core logic and API endpoints
+- 🔄 **CI/CD** — GitHub Actions workflow: lint → test → Docker build
 
 ---
 
-## 🛠️ Technologies Used
+## 🚀 Quick Start
+
+```bash
+git clone https://github.com/joshuvavinith/AI_ChatBot.git
+cd AI_ChatBot
+pip install -r requirements.txt
+
+# (optional) enable LLM mode
+echo "OPENAI_API_KEY=sk-..." > .env
 
-- **Python 3.x** – Works with any modern Python version
-- **Tkinter** – Built-in GUI framework
-- **KaggleHub** – For downloading Kaggle datasets
-- **CSV** – For reading dialog data
-- **Random** – For selecting varied responses
-- **Git** – For version control
+# Start the web UI
+streamlit run web_demo.py
+```
 
-> ✅ This implementation uses minimal external dependencies, with KaggleHub being the only non-standard library required. The core functionality works even without internet access by falling back to local data.
+---
+
+## Prerequisites
+
+- **Python 3.10 or later** (tested on 3.10, 3.11, and 3.12)
+- **pip** (included with Python)
+- *(Optional)* An [OpenAI API key](https://platform.openai.com/account/api-keys) to enable LLM mode
+- *(Optional)* [Docker](https://docs.docker.com/get-docker/) for containerised deployment
 
 ---
 
-## 🔧 Installation Instructions
+## 🔧 Installation
 
-### 1. Clone the Repository
+### 1. Clone & set up environment
 
 ```bash
 git clone https://github.com/joshuvavinith/AI_ChatBot.git
 cd AI_ChatBot
+python -m venv venv
+source venv/bin/activate   # Windows: venv\Scripts\activate
+pip install -r requirements.txt
 ```
 
-### 2. Create a Virtual Environment (Recommended)
+### 2. (Optional) Configure API key
 
-```bash
-python -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
+Create a `.env` file in the project root:
+
+```dotenv
+OPENAI_API_KEY=sk-your-key-here
 ```
 
-### 3. Install Dependencies
+Or export it as an environment variable:
 
 ```bash
-pip install -r requirements.txt
+export OPENAI_API_KEY=sk-your-key-here
 ```
 
+Without an API key the bot automatically falls back to offline pattern matching.
+
+---
+
+## ⚙️ Configuration
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `OPENAI_API_KEY` | *(unset)* | Enables LLM mode when present |
+
+### Kaggle Dataset (optional)
+
+The pattern-matching bot can use a richer Kaggle dialog dataset.
+To enable it, place `kaggle.json` in `~/.kaggle/` (or set `KAGGLE_USERNAME` / `KAGGLE_KEY`).
+If unavailable, the bot falls back to `dialog.csv`.
+
 ---
 
-## 🔑 Kaggle Dataset Setup
+## 💬 Running the Chatbot
 
-This project uses the [Simple Dialogs for Chatbot](https://www.kaggle.com/datasets/grafstor/simple-dialogs-for-chatbot) dataset via **KaggleHub**. The dataset contains over 1,800 conversation pairs that significantly enhance the chatbot's response capabilities.
+### 🖥️ Desktop GUI (Tkinter)
 
-### Dataset Features:
+```bash
+python ai_chatbot.py
+```
 
-- 1,800+ question-answer pairs
-- Covers a wide range of casual conversation topics
-- Automatically downloaded and processed at runtime
-- Converted from TXT to CSV format for compatibility
+### 🌐 Web UI (Streamlit)
 
-### To enable Kaggle downloads:
+```bash
+streamlit run web_demo.py
+```
 
-1. Go to [kaggle.com/account](https://www.kaggle.com/account) and create an API token.
-2. Download the `kaggle.json` file.
-3. Place it in:
+Open your browser at `http://localhost:8501`.
 
-   - Linux/macOS: `~/.kaggle/kaggle.json`
-   - Windows: `C:\Users\<YourUsername>\.kaggle\kaggle.json`
+Features:
+- Full conversation history
+- Streaming token output in LLM mode (looks like ChatGPT)
+- "Clear conversation" button in the sidebar
 
-Alternatively, set environment variables:
+### 🔌 REST API (FastAPI)
 
 ```bash
-export KAGGLE_USERNAME=your_username
-export KAGGLE_KEY=your_key
+uvicorn api:app --reload
 ```
 
-> ✅ If the dataset can't be downloaded, the chatbot will automatically fall back to the local dialog.csv file with basic conversation patterns.
+Interactive docs available at `http://localhost:8000/docs`.
 
 ---
 
-## 💬 Usage Example
+## 🐳 Docker
 
-### ▶️ To Run the GUI Chatbot:
+### Build
 
 ```bash
-python ai_chatbot.py
+# Web UI (default)
+docker build -t ai-chatbot:web .
+
+# API mode
+docker build --build-arg MODE=api -t ai-chatbot:api .
 ```
 
-### 🖥️ GUI Features:
+### Run
+
+```bash
+# Web UI — visit http://localhost:8501
+docker run -p 8501:8501 -e OPENAI_API_KEY=sk-... ai-chatbot:web
 
-- Type your message in the input box
-- Hit **Enter** or click **Send**
-- The chatbot responds immediately
-- Say `"bye"` or `"exit"` to end the chat
+# REST API — visit http://localhost:8000/docs
+docker run -p 8000:8000 -e OPENAI_API_KEY=sk-... ai-chatbot:api
+```
 
 ---
 
-## 🧠 Development Process
+## 📡 API Reference
 
-1. **Dataset Retrieval**: Uses `kagglehub` to fetch dialog data from Kaggle
-2. **Training**: Trains ChatterBot using NLTK preprocessed dialogs
-3. **Interface**: Built with Tkinter for easy interaction
-4. **Fallback**: Uses hardcoded sample training data if download fails
+### `POST /chat`
 
----
+Send a message and get a reply. Omit `session_id` to start a new session.
+
+```json
+// Request
+{ "message": "Hello!", "session_id": "optional-uuid" }
+
+// Response
+{
+  "reply": "Hi there! How can I help you?",
+  "session_id": "550e8400-e29b-41d4-a716-446655440000",
+  "mode": "pattern"
+}
+```
+
+### `DELETE /sessions/{session_id}`
+
+Reset (delete) a conversation session.
 
-## 🤝 Contributing Guidelines
+### `POST /train`
 
-We welcome contributions! 🙌
+Reload pattern-matching data from a CSV file on the server.
 
-### How to Contribute:
+```json
+// Request
+{ "dialog_file": "/path/to/dialog.csv" }
 
-1. **Fork this repository**
-2. Create a branch:
+// Response
+{ "status": "retrained", "patterns_loaded": 42 }
+```
 
-   ```bash
-   git checkout -b feature-branch
-   ```
+### `GET /health`
 
-3. Make your changes and commit:
+```json
+{ "status": "ok" }
+```
 
-   ```bash
-   git commit -m "Add new feature"
-   ```
+---
 
-4. Push and create a PR:
+## 🧪 Testing
 
-   ```bash
-   git push origin feature-branch
-   ```
+```bash
+pytest test_chatbot.py -v
+```
 
-> 💡 Follow Python best practices and test before submitting.
+The test suite covers:
+- `SimpleBot` — training, exact/partial matching, defaults, missing file
+- `ChatBot` — offline mode, history management, streaming, history cap, retraining
+- FastAPI — all endpoints (health, chat, delete session, train)
 
 ---
 
-## 📈 Evaluation Metrics
+## 🔄 CI/CD
+
+GitHub Actions runs on every push and pull request to `main`:
+
+1. **Lint** — `flake8` checks for syntax errors and undefined names
+2. **Test** — `pytest` full suite with coverage, across Python 3.10, 3.11, and 3.12
+3. **Docker build** — both `web` and `api` images are built to verify the Dockerfile
 
-- **BLEU Score** – Quality of generated response
-- **Accuracy** – Expected vs actual answers
-- **Responsiveness** – Time between input and output
-- **User Feedback** – Manual quality testing
+Coverage reports are uploaded as build artifacts for each Python version.
 
 ---
 
-## 🌱 Future Work
+## 📂 Project Structure
 
-- 🌐 Add API/web support for Flask or FastAPI
-- 🧠 Switch to GPT/BERT for smarter conversations
-- 🗣️ Voice integration with `speech_recognition`
-- 💾 Save and reload previous conversation history
-- 🖥️ Package as a desktop app using `pyinstaller`
+```
+AI_ChatBot/
+├── ai_chatbot.py      # Core module: SimpleBot, LLMBot, ChatBot facade, Tkinter GUI
+├── api.py             # FastAPI REST backend
+├── web_demo.py        # Streamlit web interface
+├── dialog.csv         # Default offline dialog dataset
+├── test_chatbot.py    # Pytest test suite (SimpleBot, ChatBot, FastAPI)
+├── requirements.txt   # Python dependencies
+├── Dockerfile         # Multi-mode Docker image (web / api)
+├── LICENSE            # MIT License
+└── .github/
+    └── workflows/
+        └── ci.yml     # CI pipeline: lint → test → Docker build
+```
 
 ---
 
-## 📊 Architecture Diagram
+## 📐 Architecture
 
 ```
-+-------------+        +----------------------+        +-------------+
-| User Input  +------->+    ChatBot Engine     +------->+ Bot Reply   |
-+------+------+        +----------------------+        +-------------+
-       |
-       v
-  [ Tkinter GUI ]
-       |
-       v
-[ Kaggle Dataset Trainer ]
++------------------+     +------------------+     +------------------+
+|  Streamlit Web   |     |  FastAPI REST     |     |  Tkinter Desktop |
+|  (web_demo.py)   |     |  (api.py)         |     |  (ai_chatbot.py) |
++--------+---------+     +--------+---------+     +--------+---------+
+         |                        |                         |
+         +------------------------+-------------------------+
+                                  |
+                         +--------v---------+
+                         |    ChatBot       |  ← ai_chatbot.py
+                         |  (facade)        |
+                         +--+----------+----+
+                            |          |
+               +------------+          +------------+
+               |                                    |
+      +--------v---------+              +-----------v------+
+      |   LLMBot         |              |   SimpleBot      |
+      |  (OpenAI API)    |              |  (CSV patterns)  |
+      +------------------+              +------------------+
 ```
 
 ---
 
-## 💬 Interaction with the Chatbot
+## 🤝 Contributing
+
+1. Fork this repository
+2. Create a feature branch: `git checkout -b feature/my-feature`
+3. Make your changes and run `pytest test_chatbot.py -v`
+4. Commit and push: `git push origin feature/my-feature`
+5. Open a pull request
+
+Please follow PEP 8 and include tests for any new logic.
 
-The chatbot can be integrated or extended with:
+### Adding dialog patterns
+
+To extend the offline pattern-matching bot, add rows to `dialog.csv`.
+Each question/answer pair uses two rows sharing the same `dialog_id`:
+
+```csv
+dialog_id,line_id,text
+8,1,What is Python?
+8,2,Python is a popular programming language!
+```
 
-- 📚 Custom datasets (CSV/TXT)
-- ☁️ Cloud API support
-- 🔊 Voice UI
-- 💡 Smart context-based conversations
+- `line_id` **1** = the user question (matched case-insensitively)
+- `line_id` **2** = the bot response
 
 ---
 
-## 📱 Additional Information
+## 📄 License
 
-- **Live Demo**: Coming soon!
-- **License**: [MIT License](./LICENSE)
+[MIT License](./LICENSE)
 
 ---
 
-## 🔗 Connect with Us
+## 🔗 Connect
 
-- 📧 Email: [joshuvavinith.g@care.ac.in](mailto:joshuvavinith.g@care.ac.in)
-- 🐙 GitHub: [@joshuvavinith](https://github.com/joshuvavinith)
+- 📧 [joshuavinith@gmail.com](mailto:joshuavinith@gmail.com)
+- 🐙 [@joshuvavinith](https://github.com/joshuvavinith)
diff --git a/ai_chatbot.py b/ai_chatbot.py
index baf17bd..64ee8da 100644
--- a/ai_chatbot.py
+++ b/ai_chatbot.py
@@ -26,6 +26,7 @@
 # Pattern-matching backend
 # ---------------------------------------------------------------------------
 
+
 class SimpleBot:
     """Offline chatbot backed by a CSV dialog dataset."""
 
@@ -265,14 +266,15 @@ def _parse_kaggle_dialogs(file_path: str) -> Optional[str]:
 
         temp_csv_path = os.path.join(os.path.dirname(file_path), "converted_dialog.csv")
         dialog_id = 0
-        with open(temp_csv_path, "w", encoding="utf-8") as fh:
-            fh.write("dialog_id,line_id,text\n")
+        with open(temp_csv_path, "w", encoding="utf-8", newline="") as fh:
+            writer = csv.writer(fh)
+            writer.writerow(["dialog_id", "line_id", "text"])
             for i in range(0, len(lines) - 1, 2):
                 q, a = lines[i].strip(), lines[i + 1].strip()
                 if q and a:
                     dialog_id += 1
-                    fh.write(f"{dialog_id},1,{q}\n")
-                    fh.write(f"{dialog_id},2,{a}\n")
+                    writer.writerow([dialog_id, 1, q])
+                    writer.writerow([dialog_id, 2, a])
 
         print(f"Converted {dialog_id} dialog pairs to CSV format")
         return temp_csv_path
diff --git a/api.py b/api.py
index 42b2689..6d3c9ed 100644
--- a/api.py
+++ b/api.py
@@ -46,10 +46,15 @@
 # Per-session bots (keyed by session_id string)
 _sessions: dict[str, ChatBot] = {}
 
+# Dialog file used when creating new sessions (updated by /train)
+_default_dialog_file: Optional[str] = None
+
 
 def clear_all_sessions() -> None:
-    """Remove all active sessions. Intended for use in tests."""
+    """Remove all active sessions and reset the default dialog file. Intended for use in tests."""
+    global _default_dialog_file
     _sessions.clear()
+    _default_dialog_file = None
 
 
 def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]:
@@ -59,7 +64,7 @@ def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]:
         session_id = str(uuid.uuid4())
 
     if session_id not in _sessions:
-        _sessions[session_id] = ChatBot()
+        _sessions[session_id] = ChatBot(dialog_file=_default_dialog_file)
 
     return session_id, _sessions[session_id]
 
@@ -127,21 +132,24 @@ def reset_session(session_id: str):
 def train(request: TrainRequest):
     """Reload pattern-matching data from a CSV file on the server.
 
-    The file must exist on the server filesystem.  This endpoint only
-    affects future sessions created after the reload (existing sessions
-    keep their current bot instance).
+    The file must exist on the server filesystem.  All existing sessions are
+    cleared so that subsequent requests create new sessions using the updated
+    training data.
     """
     if not os.path.exists(request.dialog_file):
         raise HTTPException(
             status_code=404,
-            detail=f"File not found: {request.dialog_file}",
+            detail="Training file not found on server.",
         )
 
-    # Retrain a fresh bot and store it as the template for new sessions
+    # Retrain a fresh bot to validate and count the patterns
     fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True)
     patterns = fresh_bot.pattern_count
 
-    # Clear all existing sessions so next requests pick up new data
+    # Persist the dialog file so new sessions created after this point use it,
+    # then clear existing sessions so they pick up the new data on next request.
+    global _default_dialog_file
+    _default_dialog_file = request.dialog_file
     _sessions.clear()
 
     return TrainResponse(status="retrained", patterns_loaded=patterns)
diff --git a/requirements.txt b/requirements.txt
index 256e933..0fe8029 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,4 +14,5 @@ uvicorn[standard]>=0.23.0
 
 # Testing
 pytest>=7.4.0
+pytest-cov>=4.0.0
 httpx>=0.25.0
\ No newline at end of file
diff --git a/test_chatbot.py b/test_chatbot.py
index fb814a2..4c4c07f 100644
--- a/test_chatbot.py
+++ b/test_chatbot.py
@@ -8,7 +8,6 @@
 import os
 import tempfile
 import pytest
-from fastapi.testclient import TestClient
 
 # ---------------------------------------------------------------------------
 # Fixtures
@@ -216,3 +215,33 @@ def test_train_missing_file(self, api_client):
         client, _ = api_client
         resp = client.post("/train", json={"dialog_file": "/nonexistent/file.csv"})
         assert resp.status_code == 404
+
+    def test_train_affects_new_chat_session(self, api_client):
+        client, _ = api_client
+
+        new_dialog_content = (
+            "dialog_id,line_id,text\n"
+            "1,1,hello\n"
+            "1,2,Hello from NEW dialog!\n"
+        )
+        tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False)
+        try:
+            tmp.write(new_dialog_content)
+            tmp.flush()
+            tmp_path = tmp.name
+        finally:
+            tmp.close()
+
+        try:
+            # Retrain with the new dialog file
+            resp = client.post("/train", json={"dialog_file": tmp_path})
+            assert resp.status_code == 200
+            assert resp.json()["patterns_loaded"] > 0
+
+            # A brand-new session (no session_id) should use the new patterns
+            chat_resp = client.post("/chat", json={"message": "hello"})
+            assert chat_resp.status_code == 200
+            assert "Hello from NEW dialog" in chat_resp.json()["reply"]
+        finally:
+            if os.path.exists(tmp_path):
+                os.remove(tmp_path)
diff --git a/web_demo.py b/web_demo.py
index d141b9f..2909f04 100644
--- a/web_demo.py
+++ b/web_demo.py
@@ -8,8 +8,6 @@
     OPENAI_API_KEY   Set this to enable the LLM backend (optional).
 """
 
-import os
-
 import streamlit as st
 
 # Load .env if available
@@ -39,15 +37,10 @@
 # ---------------------------------------------------------------------------
 # Session-level chatbot instance
 # ---------------------------------------------------------------------------
-@st.cache_resource
-def _get_bot_factory() -> ChatBot:
-    """Return a template bot (loads data once); actual per-session bots copy from this."""
-    return ChatBot()
-
 
 # Per-session bot stored in session_state so each browser tab/user gets its own history
 if "bot" not in st.session_state:
-    st.session_state.bot = _get_bot_factory()
+    st.session_state.bot = ChatBot()
 
 bot: ChatBot = st.session_state.bot
 

From 741ac640d974d7330fb00ee67fbcc531e63c36cd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 7 Mar 2026 08:34:37 +0000
Subject: [PATCH 4/4] Fix CI failure: restore _default_dialog_file in api.py
 and other changes reverted by merge commit

Co-authored-by: joshuvavinith <146979257+joshuvavinith@users.noreply.github.com>
---
 ai_chatbot.py   |  1 +
 api.py          | 31 +++++++++++++++++++++++--------
 test_chatbot.py |  1 -
 web_demo.py     |  9 +--------
 4 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/ai_chatbot.py b/ai_chatbot.py
index baf17bd..6f40a21 100644
--- a/ai_chatbot.py
+++ b/ai_chatbot.py
@@ -26,6 +26,7 @@
 # Pattern-matching backend
 # ---------------------------------------------------------------------------
 
+
 class SimpleBot:
     """Offline chatbot backed by a CSV dialog dataset."""
 
diff --git a/api.py b/api.py
index 42b2689..7865b99 100644
--- a/api.py
+++ b/api.py
@@ -46,10 +46,15 @@
 # Per-session bots (keyed by session_id string)
 _sessions: dict[str, ChatBot] = {}
 
+# Dialog file used when creating new sessions (updated by /train)
+_default_dialog_file: Optional[str] = None
+
 
 def clear_all_sessions() -> None:
-    """Remove all active sessions. Intended for use in tests."""
+    """Remove all active sessions and reset the default dialog file. Intended for use in tests."""
+    global _default_dialog_file
     _sessions.clear()
+    _default_dialog_file = None
 
 
 def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]:
@@ -59,7 +64,14 @@ def _get_or_create_session(session_id: Optional[str]) -> tuple[str, ChatBot]:
         session_id = str(uuid.uuid4())
 
     if session_id not in _sessions:
-        _sessions[session_id] = ChatBot()
+        # When a dialog file has been explicitly configured via /train, create
+        # the bot in offline mode so the file is always used (no Kaggle fallback).
+        if _default_dialog_file is not None:
+            _sessions[session_id] = ChatBot(
+                dialog_file=_default_dialog_file, force_offline=True
+            )
+        else:
+            _sessions[session_id] = ChatBot()
 
     return session_id, _sessions[session_id]
 
@@ -127,21 +139,24 @@ def reset_session(session_id: str):
 def train(request: TrainRequest):
     """Reload pattern-matching data from a CSV file on the server.
 
-    The file must exist on the server filesystem.  This endpoint only
-    affects future sessions created after the reload (existing sessions
-    keep their current bot instance).
+    The file must exist on the server filesystem.  All existing sessions are
+    cleared so that subsequent requests create new sessions using the updated
+    training data.
     """
     if not os.path.exists(request.dialog_file):
         raise HTTPException(
             status_code=404,
-            detail=f"File not found: {request.dialog_file}",
+            detail="Training file not found on server. Please verify the file path.",
         )
 
-    # Retrain a fresh bot and store it as the template for new sessions
+    # Retrain a fresh bot to validate and count the patterns
     fresh_bot = ChatBot(dialog_file=request.dialog_file, force_offline=True)
     patterns = fresh_bot.pattern_count
 
-    # Clear all existing sessions so next requests pick up new data
+    # Persist the dialog file so new sessions created after this point use it,
+    # then clear existing sessions so they pick up the new data on next request.
+    global _default_dialog_file
+    _default_dialog_file = request.dialog_file
     _sessions.clear()
 
     return TrainResponse(status="retrained", patterns_loaded=patterns)
diff --git a/test_chatbot.py b/test_chatbot.py
index 468c8c6..4c4c07f 100644
--- a/test_chatbot.py
+++ b/test_chatbot.py
@@ -8,7 +8,6 @@
 import os
 import tempfile
 import pytest
-from fastapi.testclient import TestClient
 
 # ---------------------------------------------------------------------------
 # Fixtures
diff --git a/web_demo.py b/web_demo.py
index d141b9f..2909f04 100644
--- a/web_demo.py
+++ b/web_demo.py
@@ -8,8 +8,6 @@
     OPENAI_API_KEY   Set this to enable the LLM backend (optional).
 """
 
-import os
-
 import streamlit as st
 
 # Load .env if available
@@ -39,15 +37,10 @@
 # ---------------------------------------------------------------------------
 # Session-level chatbot instance
 # ---------------------------------------------------------------------------
-@st.cache_resource
-def _get_bot_factory() -> ChatBot:
-    """Return a template bot (loads data once); actual per-session bots copy from this."""
-    return ChatBot()
-
 
 # Per-session bot stored in session_state so each browser tab/user gets its own history
 if "bot" not in st.session_state:
-    st.session_state.bot = _get_bot_factory()
+    st.session_state.bot = ChatBot()
 
 bot: ChatBot = st.session_state.bot