PolySensor/api_endpoints.py at main · adityasinghcoding/PolySensor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# Import necessary libraries for vector database API endpoints
# Blueprint: Flask's way to organize related routes into modules
# request: For handling incoming HTTP requests and extracting data
# jsonify: For converting Python objects to JSON responses
# get_vector_db: Function to get the global vector database instance
# os: For file system operations (though not heavily used here)
from flask import Blueprint, request, jsonify
from vector_db import get_vector_db
import os

# Create a Flask Blueprint for vector database operations
# Blueprints help organize routes into logical groups
vector_api = Blueprint('vector_api', __name__)

# Get the global vector database instance
# This ensures all endpoints use the same DB connection
vector_db = get_vector_db()

@vector_api.route('/search', methods=['POST'])
def search_similar():
    """
    Search for similar content in the vector database using semantic similarity.

    This endpoint enables RAG (Retrieval-Augmented Generation) by finding
    relevant past analyses to provide context for AI chat responses.

    Expected JSON payload:
    {
        "query": "What is machine learning?",
        "n_results": 5,  // optional, defaults to 5
        "content_type": "analysis"  // optional filter
    }

    Returns:
        JSON with results array containing matched content, metadata, and similarity scores
    """
    try:
        # Extract search parameters from JSON request body
        data = request.get_json()
        query = data.get('query', '')  # The search text
        n_results = data.get('n_results', 5)  # How many results to return
        content_type = data.get('content_type', None)  # Optional filter by type

        # Validate required parameters
        if not query:
            return jsonify({'error': 'Query is required'}), 400

        # Perform semantic search in vector database
        results = vector_db.search_similar(query, n_results=n_results, content_type=content_type)

        # Return formatted response
        return jsonify({
            'results': results,  # Array of matched documents with scores
            'total_results': len(results)  # Count for frontend display
        })

    except Exception as e:
        # Handle any errors (API failures, DB issues, etc.)
        return jsonify({'error': str(e)}), 500

@vector_api.route('/add', methods=['POST'])
def add_content():
    """
    Add new content to the vector database for future semantic search.

    This endpoint stores AI-generated analysis results as vector embeddings,
    enabling the RAG system to retrieve relevant context during chat.

    Expected JSON payload:
    {
        "content": "Analysis result text...",
        "metadata": {"filename": "doc.pdf", "size": 1234},  // optional
        "content_type": "analysis"  // optional, defaults to "unknown"
    }

    Returns:
        JSON with success status and document ID for tracking
    """
    try:
        # Extract content data from JSON request
        data = request.get_json()
        content = data.get('content', '')  # The text to embed and store
        metadata = data.get('metadata', {})  # Additional info like filename
        content_type = data.get('content_type', 'unknown')  # Category for filtering

        # Validate required content
        if not content:
            return jsonify({'error': 'Content is required'}), 400

        # Add content to vector database and get unique document ID
        doc_id = vector_db.add_content(content, metadata=metadata, content_type=content_type)

        # Check if addition was successful
        if doc_id:
            return jsonify({
                'success': True,
                'doc_id': doc_id,  # Unique identifier for the stored content
                'message': 'Content added successfully'
            })
        else:
            # Handle embedding/storage failures
            return jsonify({'error': 'Failed to add content'}), 500

    except Exception as e:
        # Handle API errors, quota exceeded, etc.
        return jsonify({'error': str(e)}), 500

@vector_api.route('/stats', methods=['GET'])
def get_stats():
    """
    Get statistics about the vector database collection.

    Provides information about the number of stored analyses,
    useful for monitoring system usage and health.

    Returns:
        JSON with collection statistics (total documents, collection name)
    """
    try:
        # Retrieve collection statistics from vector database
        stats = vector_db.get_collection_stats()
        return jsonify(stats)
    except Exception as e:
        # Handle database connection or query errors
        return jsonify({'error': str(e)}), 500

@vector_api.route('/delete/<doc_id>', methods=['DELETE'])
def delete_content(doc_id):
    """
    Delete a specific document from the vector database by its ID.

    Useful for removing outdated or erroneous analyses.
    Note: This permanently removes the vector embedding and metadata.

    Args:
        doc_id (str): The unique document identifier from add_content response

    Returns:
        JSON with success status and confirmation message
    """
    try:
        # Attempt to delete the document by ID
        success = vector_db.delete_content(doc_id)
        if success:
            return jsonify({'success': True, 'message': 'Content deleted successfully'})
        else:
            # Document not found or deletion failed
            return jsonify({'error': 'Failed to delete content'}), 500
    except Exception as e:
        # Handle database errors or invalid IDs
        return jsonify({'error': str(e)}), 500

@vector_api.route('/all', methods=['GET'])
def get_all_content():
    """
    Get all stored content from the vector database (admin/debug endpoint).

    Retrieves raw documents without similarity search, useful for:
    - Administrative overview of stored content
    - Debugging data issues
    - Exporting all analyses

    Query Parameters:
        limit (int): Maximum number of documents to return (default: 100)

    Returns:
        JSON with array of all documents (IDs, content, metadata)
    """
    try:
        # Get limit from query parameters, default to 100 to prevent overload
        limit = int(request.args.get('limit', 100))

        # Retrieve all documents up to the limit
        docs = vector_db.get_all_content(limit=limit)

        # Return in standardized format
        return jsonify({'documents': docs})
    except Exception as e:
        # Handle database errors or invalid parameters
        return jsonify({'error': str(e)}), 500