Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 278 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from flask import Flask, request, jsonify
from bson import ObjectId

from io import BytesIO

from src.handlers.glossary_handler import GlossaryHandler
from src.handlers.knowledge_graph_handler import KnowledgeGraphHandler
from src.generators.answer_generator import AnswerGenerator
from src.services.storage_service import StorageService
from src.generators.answer_generator import AnswerGenerator

from src.config.config import Config
from src.config.logging_config import setup_logging
Expand All @@ -15,9 +16,9 @@

config = Config()
logger = setup_logging(config.logging_config)
answer_generator = AnswerGenerator(config)
storage_service = StorageService()
glossary_handler = GlossaryHandler()
answer_generator = AnswerGenerator(config)

app = Flask(__name__)

Expand Down Expand Up @@ -80,18 +81,291 @@ def get_all_documents():
def get_answer():
data = request.get_json()

if 'question' not in data:
return jsonify({"error": "Question field is required"}), 400

question = data['question']
session_id = data.get('session_id') # Optional session_id from frontend

# If no session_id provided, create a new one using MongoDB ObjectId
if not session_id:
session_id = str(ObjectId())
logger.info(f"Created new session for query: {session_id}")

try:
# Save user question to database
user_message = save_chat_message(session_id, 'user', question)
logger.info(f"Saved user question to session {session_id}")

# Generate answer using the knowledge graph
reponse = answer_generator.generate_answer(question)

# Save LLM response to database
assistant_message = save_chat_message(session_id, 'assistant', str(reponse["answer"]), reponse["explanation"])
logger.info(f"Saved LLM response to session {session_id}")

return jsonify({
"answer": str(reponse["answer"]),
"explanation": reponse["explanation"],
"session_id": session_id # Return session_id to frontend
})
except Exception as e:
logger.error(f"Error in query endpoint: {e}")
return jsonify({"error": str(e)}), 500



# Enhanced chat endpoint that uses knowledge graph for responses
@app.route('/api/chat/<string:session_id>/query', methods=['POST'])
def chat_with_knowledge_graph(session_id):
"""
Send a question to a specific chat session and get an answer from the knowledge graph.
This endpoint combines chat functionality with knowledge graph querying.
"""
data = request.get_json()

if 'question' not in data:
return jsonify({"error": "Question field is required"}), 400

question = data['question']

try:
# Save user question to database
user_message = save_chat_message(session_id, 'user', question)

# Generate answer using the knowledge graph
response = answer_generator.generate_answer(question)
return jsonify(response)

logger.info(f"Generated answer for session {session_id}: {response}")

# Save LLM response to database
assistant_message = save_chat_message(session_id, 'assistant', str(response["answer"]), response["explanation"])

logger.info(f"Processed chat query for session {session_id}")

return jsonify({
"answer": str(response["answer"]),
"session_id": session_id,
"user_message": serialize_message(user_message),
"assistant_message": serialize_message(assistant_message),
"explanation": response["explanation"]
}), 200

except Exception as e:
logger.error(f"Error in chat query endpoint: {e}")
return jsonify({"error": str(e)}), 500




# Chat functionality setup
chat_collection = storage_service.chat_collection
datetime = storage_service.datetime

def save_chat_message(session_id, role, answer, explanation = []):
"""
Utility function to save a chat message to the database.

Args:
session_id (str): The chat session ID
role (str): 'user' or 'assistant'
content (str): The message content

Returns:
dict: The saved message document
"""
message = {
'session_id': session_id,
'role': role,
'content': answer,
'explanation': explanation,
'timestamp': datetime.utcnow()
}
result = chat_collection.insert_one(message)
message['_id'] = result.inserted_id
return message

serialize_message = lambda msg: {
'id': str(msg['_id']),
'session_id': msg['session_id'],
'role': msg['role'],
'content': msg['content'],
'timestamp': msg['timestamp'].isoformat() if isinstance(msg['timestamp'], datetime) else msg['timestamp'],
'explanation': msg["explanation"]
}

# Endpoint to list chat sessions
@app.route('/api/chat/sessions', methods=['GET'])
def list_chat_sessions():
"""
List all chat sessions with their metadata.
This can be useful for the frontend to show a list of previous chats.
"""
try:
# Get unique session IDs and their latest message timestamps
pipeline = [
{
'$group': {
'_id': '$session_id',
'last_message_time': {'$max': '$timestamp'},
'message_count': {'$sum': 1},
'first_user_message': {'$first': {'$cond': [{'$eq': ['$role', 'user']}, '$content', None]}},
'last_message': {'$last': '$content'}
}
},
{
'$sort': {'last_message_time': -1}
},
{
'$limit': 50 # Limit to last 50 sessions
}
]

sessions = list(chat_collection.aggregate(pipeline))

# Format the response
formatted_sessions = []
for session in sessions:
# Generate topic suggestion from first user message or last message
topic = generate_topic_suggestion(session.get('first_user_message') or session.get('last_message', ''))

formatted_sessions.append({
'session_id': session['_id'],
'last_message_time': session['last_message_time'].isoformat() if isinstance(session['last_message_time'], datetime) else session['last_message_time'],
'message_count': session['message_count'],
'suggested_topic': topic,
'preview': truncate_text(session.get('last_message', ''), 100)
})

return jsonify({
'sessions': formatted_sessions,
'total_count': len(formatted_sessions)
}), 200

except Exception as e:
logger.error(f"Error listing chat sessions: {e}")
return jsonify({'error': f'Error listing chat sessions: {str(e)}'}), 500

def generate_topic_suggestion(message):
"""
Generate a topic suggestion based on the message content.
"""
if not message:
return "New Chat"

# Simple keyword-based topic generation
message_lower = message.lower()

# Define topic keywords
topic_keywords = {
"Technical Support": ["error", "bug", "issue", "problem", "fix", "troubleshoot"],
"Data Analysis": ["data", "analysis", "chart", "graph", "statistics", "metrics"],
"Knowledge Query": ["what is", "how to", "explain", "define", "meaning"],
"Document Processing": ["pdf", "document", "file", "upload", "process"],
"API Integration": ["api", "endpoint", "request", "response", "integration"],
"Database": ["database", "query", "mongodb", "neo4j", "collection"],
"Configuration": ["config", "setup", "install", "configure", "environment"]
}

# Check for keyword matches
for topic, keywords in topic_keywords.items():
if any(keyword in message_lower for keyword in keywords):
return topic

# Extract first few meaningful words as fallback
words = message.split()[:3]
if words:
return " ".join(words).title()

return "General Discussion"

def truncate_text(text, max_length=100):
"""
Truncate text to specified length with ellipsis.
"""
if not text:
return ""

if len(text) <= max_length:
return text

return text[:max_length].rsplit(' ', 1)[0] + "..."


# Endpoint to create a new chat session
@app.route('/api/chat/create', methods=['POST'])
def create_chat_session():
"""
Create a new chat session and return the session ID.
The frontend can use this ID to start a new chat thread.
"""
try:
# Generate a unique session ID using MongoDB ObjectId
session_id = str(ObjectId())

# Optionally, you can create an initial session document in MongoDB
# to track session metadata (creation time, etc.)
session_metadata = {
'session_id': session_id,
'created_at': datetime.utcnow(),
'status': 'active'
}

# Store session metadata (optional)
# For now, we'll just return the session ID without storing metadata
# If you want to store session metadata, uncomment the next line:
# chat_collection.insert_one(session_metadata)

logger.info(f"Created new chat session: {session_id}")

return jsonify({
'session_id': session_id,
'message': 'Chat session created successfully'
}), 201

except Exception as e:
logger.error(f"Error creating chat session: {e}")
return jsonify({'error': f'Error creating chat session: {str(e)}'}), 500

# Endpoint to add a message to a specific chat session
@app.route('/api/chat/<string:session_id>', methods=['POST'])
def add_message(session_id):
data = request.get_json()
role = data.get('role')
content = data.get('content')
if not role or not content:
return jsonify({'error': 'role and content are required'}), 400

message = {
'session_id': session_id,
'role': role,
'content': content,
'timestamp': datetime.utcnow()
}
result = chat_collection.insert_one(message)
message['_id'] = result.inserted_id
return jsonify(serialize_message(message)), 201

# Endpoint to retrieve chat history for a specific session
@app.route('/api/chat/<string:session_id>', methods=['GET'])
def get_history(session_id):
limit = int(request.args.get('limit', 100))
skip = int(request.args.get('skip', 0))

cursor = chat_collection.find({'session_id': session_id})\
.sort('timestamp', 1)\
.skip(skip)\
.limit(limit)
messages = [serialize_message(msg) for msg in cursor]
return jsonify(messages), 200


# Endpoint to delete chat history for a specific session
@app.route('/api/chat/<string:session_id>', methods=['DELETE'])
def delete_history(session_id):
result = chat_collection.delete_many({'session_id': session_id})
return jsonify({'deleted_count': result.deleted_count}), 200


# New endpoint: add glossary items
@app.route('/api/glossary/add', methods=['POST'])
Expand Down Expand Up @@ -143,7 +417,7 @@ def delete_glossary(item_id):
except Exception as e:
logger.error(f"Error deleting glossary item: {e}")
return jsonify({"error": str(e)}), 500


if __name__ == "__main__":
app.run(debug=True)
Binary file modified src/config/__pycache__/config.cpython-312.pyc
Binary file not shown.
18 changes: 10 additions & 8 deletions src/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ def __init__(self):
# self.neo4j_username = os.getenv("NEO4J_USERNAME", "neo4j")
# self.neo4j_password = os.getenv("NEO4J_PASSWORD", "MTmhQ8kiaRqRltgDThU_4hYE-aCCpIVk5aNmcUnKWKU")

self.neo4j_uri = os.getenv("NEO4J_URI", "neo4j+s://")
self.neo4j_uri = os.getenv("NEO4J_URI", "neo4j+s://c95a3680.databases.neo4j.io")
self.neo4j_username = os.getenv("NEO4J_USERNAME", "neo4j")
self.neo4j_password = os.getenv("NEO4J_PASSWORD", "")
self.neo4j_password = os.getenv("NEO4J_PASSWORD", "5SYecqiUcLZz4pzO9CDdGs9jlU5rOKUQ6ddtK6DEl1o")

self.deepinfra_api_token = os.getenv("DEEPINFRA_API_TOKEN", "")
self.deepinfra_api_token = os.getenv("DEEPINFRA_API_TOKEN", "YuGM4YMWqQU4kVM0u47Ntev9gUjFv2Om")

self.groq_api_key = os.getenv("GROQ_API_KEY", "")
self.groq_api_key = os.getenv("GROQ_API_KEY", "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")

#self.chat_template = os.getenv("CHAT_TEMPLATE", """Answer the question based only on the following context:

self.chat_template = os.getenv("CHAT_TEMPLATE",
"""
Expand Down Expand Up @@ -88,16 +90,16 @@ def __init__(self):
# MongoDB
self.mongo_uri = os.getenv("MONGO_URI", "mongodb+srv://kavindamadhuranga74:fLaa4T079luktEQv@cluster0.xkdqxqw.mongodb.net/?appName=Cluster0")
# self.mongo_glossary_db = os.getenv("GlossaryDB", "your_ai_db")
# self.mongo_glossary_collection = os.getenv("GlossaryCollection", "pdf_store")


# self.mongo_glossary_collection = os.getenv("GlossaryCollection", "pdf_store")
self.mongo_glossary_db = os.getenv("GlossaryDB", "glossary_database")
self.mongo_glossary_collection = os.getenv("GlossaryCollection", "glossary_collection")


self.mongo_metadata_db = os.getenv("METADATA_DB", "your_ai_db")
self.mongo_metadata_collection = os.getenv("METADATA_COLLECTION", "pdf_store")

# Chat configuration
self.mongo_chat_db = os.getenv("CHAT_DB", "your_ai_db")
self.mongo_chat_collection = os.getenv("CHAT_COLLECTION", "chat_sessions")

# Azure
self.azure_connection_string = os.getenv("AZURE_CONNECTION_STRING", "DefaultEndpointsProtocol=https;AccountName=researchpdfstore;AccountKey=SQnY5MvTblA+bEu7bPw3orgeZhZzvg6jNTSF4c7yWCFsdk3cwWe5pqAPgPRGdCiwr2EIY/oKK8gR+AStFcG4WQ==;EndpointSuffix=core.windows.net")
Expand Down
Binary file modified src/generators/__pycache__/answer_generator.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file modified src/handlers/__pycache__/glossary_handler.cpython-312.pyc
Binary file not shown.
Binary file modified src/handlers/__pycache__/knowledge_graph_handler.cpython-312.pyc
Binary file not shown.
Binary file modified src/handlers/__pycache__/query_handler.cpython-312.pyc
Binary file not shown.
Binary file modified src/services/__pycache__/mongo_service.cpython-312.pyc
Binary file not shown.
Binary file modified src/services/__pycache__/storage_service.cpython-312.pyc
Binary file not shown.
11 changes: 8 additions & 3 deletions src/services/storage_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,18 @@ def __init__(self):
logger.info("Successfully connected to Azure storage container")
except Exception as e:
logger.error(f"Failed to connect to Azure storage container: {e}")
raise

# Mongo config
raise # Mongo config
self.mongo_client = MongoClient(config.mongo_uri)
self.metadata_db = self.mongo_client[config.mongo_metadata_db]
self.metadata_collection = self.metadata_db[config.mongo_metadata_collection]

# Chat collection setup
self.chat_db = self.mongo_client[config.mongo_chat_db]
self.chat_collection = self.chat_db[config.mongo_chat_collection]

# For datetime reference in main.py
self.datetime = datetime

try:
logger.info("Connected to MongoDB")
except Exception as e:
Expand Down