The LLM Guardian Cluster exposes a comprehensive REST API for interacting with specialists, guardians, and cluster management functionality.
All API endpoints require authentication using OAuth2 Bearer tokens:
Authorization: Bearer <your-access-token>Production: https://api.llm-guardian-cluster.com/v1
Development: http://localhost:8000/v1
Process a request through the LLM Guardian Cluster.
POST /requestsRequest Body:
{
"query": "string",
"specialist_preference": "reasoning|memory|communication|qa|monitor",
"quality_threshold": 0.8,
"timeout": 30,
"context": {
"user_id": "string",
"session_id": "string",
"metadata": {}
},
"options": {
"enable_guardian_monitoring": true,
"require_safety_validation": true,
"max_specialists": 3
}
}Response:
{
"request_id": "uuid",
"status": "success|error|timeout",
"response": {
"content": "string",
"confidence": 0.95,
"quality_score": 0.87,
"processing_time": 2.34,
"specialists_used": ["reasoning", "memory"],
"guardian_reports": [
{
"guardian_type": "watcher",
"specialist": "reasoning",
"evaluation_score": 0.9,
"issues_detected": [],
"recommendations": []
}
]
},
"metadata": {
"timestamp": "2024-01-01T12:00:00Z",
"processing_duration": 2.34,
"resource_usage": {
"cpu_time": 1.2,
"gpu_memory": "4.5GB"
}
}
}Retrieve the status and results of a previously submitted request.
GET /requests/{request_id}Response:
{
"request_id": "uuid",
"status": "processing|completed|failed",
"progress": 75,
"estimated_completion": "2024-01-01T12:05:00Z",
"partial_results": {},
"final_result": {}
}Cancel a processing request.
DELETE /requests/{request_id}Get information about available specialists.
GET /specialistsResponse:
{
"specialists": [
{
"id": "reasoning-specialist-1",
"type": "reasoning",
"status": "active|inactive|busy",
"capabilities": ["logical_reasoning", "problem_solving", "analysis"],
"performance_metrics": {
"average_response_time": 1.5,
"quality_score": 0.89,
"requests_processed": 1250,
"error_rate": 0.02
},
"resource_usage": {
"cpu_utilization": 65,
"gpu_utilization": 78,
"memory_usage": "8.2GB"
}
}
]
}Get detailed information about a specific specialist.
GET /specialists/{specialist_id}Response:
{
"id": "reasoning-specialist-1",
"type": "reasoning",
"model_info": {
"model_name": "reasoning-specialist-v2.1",
"model_size": "7B",
"context_length": 32768,
"training_date": "2024-01-01"
},
"configuration": {
"temperature": 0.7,
"max_tokens": 2048,
"top_p": 0.9
},
"health": {
"status": "healthy",
"last_health_check": "2024-01-01T12:00:00Z",
"health_score": 0.95
}
}Update the configuration of a specialist.
PATCH /specialists/{specialist_id}/configRequest Body:
{
"configuration": {
"temperature": 0.8,
"max_tokens": 4096
}
}Get information about all guardians in the system.
GET /guardiansResponse:
{
"guardians": [
{
"id": "watcher-guardian-1",
"type": "watcher",
"specialist_id": "reasoning-specialist-1",
"status": "active",
"performance": {
"evaluations_performed": 5420,
"average_evaluation_time": 0.3,
"accuracy_score": 0.94
}
}
]
}Retrieve reports from a specific guardian.
GET /guardians/{guardian_id}/reportsQuery Parameters:
start_time: ISO 8601 timestampend_time: ISO 8601 timestamplimit: Maximum number of reports (default: 100)severity: Filter by severity level
Response:
{
"reports": [
{
"id": "report-uuid",
"guardian_id": "watcher-guardian-1",
"specialist_id": "reasoning-specialist-1",
"timestamp": "2024-01-01T12:00:00Z",
"type": "quality_evaluation",
"severity": "info|warning|error|critical",
"summary": "Quality evaluation completed",
"details": {
"quality_score": 0.87,
"issues_detected": [],
"recommendations": []
}
}
],
"pagination": {
"total": 500,
"limit": 100,
"offset": 0,
"has_more": true
}
}Retrieve analytical insights from guardians.
GET /guardians/insightsQuery Parameters:
time_window: Time window for analysis (1h, 24h, 7d, 30d)guardian_types: Comma-separated list of guardian typesspecialist_types: Comma-separated list of specialist types
Response:
{
"insights": {
"performance_trends": {
"quality_scores": {
"trend": "improving",
"change_percentage": 5.2,
"current_average": 0.87
},
"response_times": {
"trend": "stable",
"change_percentage": -1.1,
"current_average": 2.1
}
},
"common_issues": [
{
"issue_type": "context_insufficiency",
"frequency": 45,
"impact_score": 0.7,
"recommended_actions": ["improve_context_retrieval"]
}
],
"improvement_opportunities": [
{
"area": "prompt_optimization",
"potential_improvement": 15,
"confidence": 0.8
}
]
}
}Get overall cluster health and status.
GET /cluster/statusResponse:
{
"cluster": {
"id": "llm-guardian-cluster-prod",
"status": "healthy|degraded|unhealthy",
"version": "1.2.3",
"uptime": "72h15m30s",
"health_score": 0.95
},
"components": {
"specialists": {
"total": 8,
"active": 7,
"inactive": 1,
"health_score": 0.92
},
"guardians": {
"total": 12,
"active": 12,
"health_score": 0.98
},
"infrastructure": {
"databases": "healthy",
"message_queues": "healthy",
"monitoring": "healthy"
}
},
"performance": {
"requests_per_second": 45.2,
"average_response_time": 2.1,
"error_rate": 0.01,
"resource_utilization": {
"cpu": 68,
"memory": 72,
"gpu": 85
}
}
}Get detailed system metrics.
GET /cluster/metricsQuery Parameters:
start_time: ISO 8601 timestampend_time: ISO 8601 timestampmetrics: Comma-separated list of metric namesgranularity: Time granularity (1m, 5m, 1h, 1d)
Response:
{
"metrics": {
"request_rate": [
{
"timestamp": "2024-01-01T12:00:00Z",
"value": 42.5
}
],
"quality_scores": [
{
"timestamp": "2024-01-01T12:00:00Z",
"specialist": "reasoning",
"value": 0.87
}
],
"resource_utilization": [
{
"timestamp": "2024-01-01T12:00:00Z",
"resource": "gpu_memory",
"value": 8.2
}
]
}
}Update cluster configuration.
PATCH /cluster/configRequest Body:
{
"specialists": {
"reasoning": {
"min_replicas": 2,
"max_replicas": 5,
"auto_scaling": true
}
},
"guardians": {
"quality_threshold": 0.8,
"monitoring_interval": 30
}
}Upload training data for specialist improvement.
POST /data/trainingRequest Body (multipart/form-data):
file: training_data.jsonl
specialist_type: reasoning
data_type: improvement
metadata: {"source": "user_feedback", "quality": "high"}
Response:
{
"upload_id": "uuid",
"status": "processing",
"file_info": {
"filename": "training_data.jsonl",
"size": 1048576,
"records": 1000
},
"processing_status": {
"validated": 950,
"errors": 50,
"estimated_completion": "2024-01-01T12:30:00Z"
}
}Search the knowledge base used by specialists.
GET /data/knowledgeQuery Parameters:
query: Search querylimit: Maximum results (default: 10)similarity_threshold: Minimum similarity score (0.0-1.0)specialist_type: Filter by specialist type
Response:
{
"results": [
{
"id": "doc-uuid",
"content": "Knowledge base content...",
"similarity_score": 0.95,
"metadata": {
"source": "wikipedia",
"last_updated": "2024-01-01T00:00:00Z",
"specialist_types": ["reasoning", "memory"]
}
}
],
"total_results": 150,
"query_time": 0.045
}Get improvement suggestions from guardians.
GET /improvements/suggestionsQuery Parameters:
status: Filter by status (pending, approved, rejected, implemented)specialist_type: Filter by specialist typepriority: Filter by priority (low, medium, high, critical)
Response:
{
"suggestions": [
{
"id": "improvement-uuid",
"guardian_id": "optimizer-guardian-1",
"specialist_id": "reasoning-specialist-1",
"type": "prompt_optimization",
"priority": "high",
"status": "pending",
"description": "Optimize prompt for better logical reasoning",
"expected_impact": {
"quality_improvement": 15,
"performance_impact": -2
},
"implementation_effort": "medium",
"created_at": "2024-01-01T12:00:00Z"
}
]
}Approve an improvement suggestion for implementation.
POST /improvements/{improvement_id}/approveRequest Body:
{
"approval_notes": "Approved for implementation in next maintenance window",
"scheduled_implementation": "2024-01-02T02:00:00Z"
}Track the implementation status of an improvement.
GET /improvements/{improvement_id}/statusResponse:
{
"improvement_id": "improvement-uuid",
"status": "implementing",
"progress": 65,
"implementation_log": [
{
"timestamp": "2024-01-01T12:00:00Z",
"stage": "validation",
"status": "completed",
"details": "Improvement validated successfully"
},
{
"timestamp": "2024-01-01T12:30:00Z",
"stage": "staging_deployment",
"status": "in_progress",
"details": "Deploying to staging environment"
}
]
}Connect to real-time monitoring stream.
const ws = new WebSocket("wss://api.llm-guardian-cluster.com/v1/ws/monitor");
ws.onmessage = function (event) {
const data = JSON.parse(event.data);
switch (data.type) {
case "quality_alert":
console.log("Quality alert:", data.payload);
break;
case "performance_metric":
console.log("Performance update:", data.payload);
break;
case "guardian_insight":
console.log("Guardian insight:", data.payload);
break;
}
};
// Subscribe to specific events
ws.send(
JSON.stringify({
action: "subscribe",
channels: ["quality_alerts", "performance_metrics", "guardian_insights"],
filters: {
specialist_types: ["reasoning", "memory"],
severity: ["warning", "error", "critical"],
},
})
);Stream real-time updates for request processing.
const ws = new WebSocket("wss://api.llm-guardian-cluster.com/v1/ws/requests");
ws.send(
JSON.stringify({
action: "track_request",
request_id: "request-uuid",
})
);
ws.onmessage = function (event) {
const update = JSON.parse(event.data);
console.log("Request update:", update);
// {
// request_id: "request-uuid",
// stage: "specialist_processing",
// progress: 45,
// estimated_completion: "2024-01-01T12:05:00Z"
// }
};All API endpoints use standard HTTP status codes and return structured error responses:
{
"error": {
"code": "SPECIALIST_UNAVAILABLE",
"message": "No reasoning specialists are currently available",
"details": {
"requested_specialist": "reasoning",
"available_specialists": ["memory", "communication"],
"estimated_wait_time": 120
},
"request_id": "uuid",
"timestamp": "2024-01-01T12:00:00Z"
}
}Common Error Codes:
INVALID_REQUEST: Request validation failedSPECIALIST_UNAVAILABLE: Requested specialist is not availableQUALITY_THRESHOLD_NOT_MET: Response quality below thresholdTIMEOUT: Request processing timeoutRATE_LIMIT_EXCEEDED: API rate limit exceededINSUFFICIENT_RESOURCES: Not enough system resourcesGUARDIAN_ALERT: Guardian detected safety or quality issue
API endpoints are rate limited per user/API key:
- Standard endpoints: 1000 requests/hour
- Resource-intensive endpoints: 100 requests/hour
- Real-time WebSocket: 10 connections per user
Rate limit headers are included in responses:
X-RateLimit-Limit: 1000
X-RateLimit-Remaining: 999
X-RateLimit-Reset: 1609459200from llm_guardian_cluster import LLMGuardianClient
client = LLMGuardianClient(
api_key="your-api-key",
base_url="https://api.llm-guardian-cluster.com/v1"
)
# Submit request
response = await client.submit_request(
query="Explain quantum computing",
specialist_preference="reasoning",
quality_threshold=0.8
)
print(f"Response: {response.content}")
print(f"Quality Score: {response.quality_score}")
# Monitor guardians
async for insight in client.stream_guardian_insights():
print(f"Guardian insight: {insight}")import { LLMGuardianClient } from "@llm-guardian/client";
const client = new LLMGuardianClient({
apiKey: "your-api-key",
baseURL: "https://api.llm-guardian-cluster.com/v1",
});
// Submit request
const response = await client.submitRequest({
query: "Explain quantum computing",
specialistPreference: "reasoning",
qualityThreshold: 0.8,
});
console.log("Response:", response.content);
console.log("Quality Score:", response.qualityScore);
// Real-time monitoring
client.monitorGuardians((insight) => {
console.log("Guardian insight:", insight);
});Next: Deployment Strategies