From 6e5c22c450188f897ebf42fa279fcf04b97550f5 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 20 Feb 2026 16:09:11 +0530
Subject: [PATCH 01/27] remove unwanted file

---
 enrich.yml.backup | 157 ----------------------------------------------
 1 file changed, 157 deletions(-)
 delete mode 100644 enrich.yml.backup

diff --git a/enrich.yml.backup b/enrich.yml.backup
deleted file mode 100644
index 28cd5b3..0000000
--- a/enrich.yml.backup
+++ /dev/null
@@ -1,157 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Enrich service data and index in Qdrant"
-  method: post
-  accepts: json
-  returns: json
-  namespace: rag-search
-  allowlist:
-    body:
-      - field: service_id
-        type: string
-        description: "Unique service identifier"
-      - field: name
-        type: string
-        description: "Service name"
-      - field: description
-        type: string
-        description: "Service description"
-      - field: examples
-        type: array
-        description: "Example queries"
-      - field: entities
-        type: array
-        description: "Expected entity names"
-      - field: ruuter_type
-        type: string
-        description: "HTTP method (GET/POST)"
-      - field: current_state
-        type: string
-        description: "Service state (active/inactive/draft)"
-      - field: is_common
-        type: boolean
-        description: "Is common service"
-
-validate_request:
-  assign:
-    service_id: ${incoming.body.service_id}
-    service_name: ${incoming.body.name}
-    service_description: ${incoming.body.description}
-  next: check_required_fields
-
-check_required_fields:
-  switch:
-    - condition: ${!service_id}
-      next: assign_missing_service_id_error
-    - condition: ${!service_name}
-      next: assign_missing_name_error
-    - condition: ${!service_description}
-      next: assign_missing_description_error
-  next: prepare_service_data
-
-assign_missing_service_id_error:
-  assign:
-    error_response: {
-      success: false,
-      error: "MISSING_SERVICE_ID",
-      message: "service_id is required"
-    }
-  next: return_missing_service_id
-
-return_missing_service_id:
-  status: 400
-  return: ${error_response}
-  next: end
-
-assign_missing_name_error:
-  assign:
-    error_response: {
-      success: false,
-      error: "MISSING_NAME",
-      message: "name is required"
-    }
-  next: return_missing_name
-
-return_missing_name:
-  status: 400
-  return: ${error_response}
-  next: end
-
-assign_missing_description_error:
-  assign:
-    error_response: {
-      success: false,
-      error: "MISSING_DESCRIPTION",
-      message: "description is required"
-    }
-  next: return_missing_description
-
-return_missing_description:
-  status: 400
-  return: ${error_response}
-  next: end
-
-prepare_service_data:
-  assign:
-    service_data: {
-      service_id: ${service_id},
-      name: ${service_name},
-      description: ${service_description},
-      examples: ${incoming.body.examples || []},
-      entities: ${incoming.body.entities || []},
-      ruuter_type: ${incoming.body.ruuter_type || 'GET'},
-      current_state: ${incoming.body.current_state || 'draft'},
-      is_common: ${incoming.body.is_common || false}
-    }
-  next: stringify_service_data
-
-stringify_service_data:
-  assign:
-    service_json: ${JSON.stringify(service_data)}
-  next: execute_enrichment
-
-execute_enrichment:
-  call: http.post
-  args:
-    url: "[#RAG_SEARCH_CRON_MANAGER]/execute/service_enrichment/enrich_and_index"
-    query:
-      service_id: ${service_id}
-      service_data: ${service_json}
-  result: enrichment_result
-  next: assign_success_response
-  on_error: handle_enrichment_error
-
-handle_enrichment_error:
-  log: "ERROR: Service enrichment failed - ${enrichment_result.error || 'Unknown error'}"
-  next: assign_error_response
-
-assign_success_response:
-  assign:
-    success_response: {
-      success: true,
-      service_id: ${service_id},
-      message: "Service enriched and indexed successfully",
-      enrichment_details: ${enrichment_result.response.body}
-    }
-  next: return_success
-
-assign_error_response:
-  assign:
-    error_response: {
-      success: false,
-      error: "ENRICHMENT_FAILED",
-      message: "Failed to enrich and index service",
-      details: ${enrichment_result.response.body || enrichment_result.error}
-    }
-  next: return_enrichment_error
-
-return_success:
-  status: 200
-  return: ${success_response}
-  next: end
-
-return_enrichment_error:
-  status: 500
-  return: ${error_response}
-  next: end

From 38d05337f6c1ea7cd35e5e73b5c3b4b219bbeeb3 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 20 Feb 2026 16:33:02 +0530
Subject: [PATCH 02/27] updated changes

---
 src/intent_data_enrichment/__init__.py        | 8 ++++----
 src/intent_data_enrichment/api_client.py      | 4 ++--
 src/intent_data_enrichment/main_enrichment.py | 6 +++---
 src/intent_data_enrichment/qdrant_manager.py  | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/intent_data_enrichment/__init__.py b/src/intent_data_enrichment/__init__.py
index 8b538d6..eb197d3 100644
--- a/src/intent_data_enrichment/__init__.py
+++ b/src/intent_data_enrichment/__init__.py
@@ -7,10 +7,10 @@
 
 __version__ = "1.0.0"
 
-from data_enrichment.models import ServiceData, EnrichedService, EnrichmentResult
-from data_enrichment.api_client import LLMAPIClient
-from data_enrichment.qdrant_manager import QdrantManager
-from data_enrichment.constants import EnrichmentConstants
+from intent_data_enrichment.models import ServiceData, EnrichedService, EnrichmentResult
+from intent_data_enrichment.api_client import LLMAPIClient
+from intent_data_enrichment.qdrant_manager import QdrantManager
+from intent_data_enrichment.constants import EnrichmentConstants
 
 __all__ = [
     "ServiceData",
diff --git a/src/intent_data_enrichment/api_client.py b/src/intent_data_enrichment/api_client.py
index 903e642..31ed96e 100644
--- a/src/intent_data_enrichment/api_client.py
+++ b/src/intent_data_enrichment/api_client.py
@@ -6,8 +6,8 @@
 from types import TracebackType
 from loguru import logger
 
-from data_enrichment.constants import EnrichmentConstants
-from data_enrichment.models import ServiceData
+from intent_data_enrichment.constants import EnrichmentConstants
+from intent_data_enrichment.models import ServiceData
 
 
 class LLMAPIClient:
diff --git a/src/intent_data_enrichment/main_enrichment.py b/src/intent_data_enrichment/main_enrichment.py
index 2ed294f..c134166 100644
--- a/src/intent_data_enrichment/main_enrichment.py
+++ b/src/intent_data_enrichment/main_enrichment.py
@@ -12,9 +12,9 @@
 import asyncio
 from loguru import logger
 
-from data_enrichment.models import ServiceData, EnrichedService, EnrichmentResult
-from data_enrichment.api_client import LLMAPIClient
-from data_enrichment.qdrant_manager import QdrantManager
+from intent_data_enrichment.models import ServiceData, EnrichedService, EnrichmentResult
+from intent_data_enrichment.api_client import LLMAPIClient
+from intent_data_enrichment.qdrant_manager import QdrantManager
 
 
 def parse_arguments() -> ServiceData:
diff --git a/src/intent_data_enrichment/qdrant_manager.py b/src/intent_data_enrichment/qdrant_manager.py
index 3aaad61..44af0e4 100644
--- a/src/intent_data_enrichment/qdrant_manager.py
+++ b/src/intent_data_enrichment/qdrant_manager.py
@@ -6,8 +6,8 @@
 from qdrant_client import QdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct
 
-from data_enrichment.constants import EnrichmentConstants
-from data_enrichment.models import EnrichedService
+from intent_data_enrichment.constants import EnrichmentConstants
+from intent_data_enrichment.models import EnrichedService
 
 # Error messages
 _CLIENT_NOT_INITIALIZED = "Qdrant client not initialized"

From 72b8ae1fd64c64f093bd23e8284a4f4aea828c0e Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 20 Feb 2026 16:49:32 +0530
Subject: [PATCH 03/27] fixed requested changes

---
 DSL/CronManager/script/service_enrichment.sh  |  4 +--
 docker-compose-ec2.yml                        |  4 +--
 docker-compose.yml                            |  4 +--
 src/intent_data_enrichment/main_enrichment.py | 11 +++----
 src/intent_data_enrichment/qdrant_manager.py  | 29 ++++++++++++-------
 5 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/DSL/CronManager/script/service_enrichment.sh b/DSL/CronManager/script/service_enrichment.sh
index 4828833..c50a490 100644
--- a/DSL/CronManager/script/service_enrichment.sh
+++ b/DSL/CronManager/script/service_enrichment.sh
@@ -8,7 +8,7 @@ if [ -z "$service_id" ] || [ -z "$name" ] || [ -z "$description" ]; then
   exit 1
 fi
 
-PYTHON_SCRIPT="/app/src/data_enrichment/main_enrichment.py"
+PYTHON_SCRIPT="/app/src/intent_data_enrichment/main_enrichment.py"
 
 echo "[INFO] Service ID: $service_id"
 echo "[INFO] Service Name: $name"
@@ -42,7 +42,7 @@ echo "[PACKAGES] Installing required packages..."
 echo "[PACKAGES] All packages installed successfully"
 
 # Set Python path
-export PYTHONPATH="/app:/app/src:/app/src/data_enrichment:$PYTHONPATH"
+export PYTHONPATH="/app:/app/src:/app/src/intent_data_enrichment:$PYTHONPATH"
 
 # Verify Python script exists
 [ ! -f "$PYTHON_SCRIPT" ] && { echo "[ERROR] Python script not found at $PYTHON_SCRIPT"; exit 1; }
diff --git a/docker-compose-ec2.yml b/docker-compose-ec2.yml
index c6b8819..cc48c1c 100644
--- a/docker-compose-ec2.yml
+++ b/docker-compose-ec2.yml
@@ -179,7 +179,7 @@ services:
       - ./DSL/CronManager/DSL:/DSL
       - ./DSL/CronManager/script:/app/scripts
       - ./src/vector_indexer:/app/src/vector_indexer
-      - ./src/data_enrichment:/app/src/data_enrichment
+      - ./src/intent_data_enrichment:/app/src/intent_data_enrichment
       - ./src/utils/decrypt_vault_secrets.py:/app/src/utils/decrypt_vault_secrets.py:ro  # Decryption utility (read-only)
       - cron_data:/app/data
       - shared-volume:/app/shared  # Access to shared resources for cross-container coordination
@@ -188,7 +188,7 @@ services:
       - ./.env:/app/.env:ro
     environment:
       - server.port=9010
-      - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/data_enrichment
+      - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/intent_data_enrichment
       - VAULT_AGENT_URL=http://vault-agent-cron:8203
     ports:
       - 9010:8080
diff --git a/docker-compose.yml b/docker-compose.yml
index 5ac933e..1fec54b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -178,7 +178,7 @@ services:
       - ./DSL/CronManager/DSL:/DSL
       - ./DSL/CronManager/script:/app/scripts
       - ./src/vector_indexer:/app/src/vector_indexer
-      - ./src/data_enrichment:/app/src/data_enrichment
+      - ./src/intent_data_enrichment:/app/src/intent_data_enrichment
       - ./src/utils/decrypt_vault_secrets.py:/app/src/utils/decrypt_vault_secrets.py:ro  # Decryption utility (read-only)
       - cron_data:/app/data
       - shared-volume:/app/shared  # Access to shared resources for cross-container coordination
@@ -187,7 +187,7 @@ services:
       - ./.env:/app/.env:ro
     environment:
       - server.port=9010
-      - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/data_enrichment
+      - PYTHONPATH=/app:/app/src/vector_indexer:/app/src/intent_data_enrichment
       - VAULT_AGENT_URL=http://vault-agent-cron:8203
     ports:
       - 9010:8080
diff --git a/src/intent_data_enrichment/main_enrichment.py b/src/intent_data_enrichment/main_enrichment.py
index c134166..2aedb26 100644
--- a/src/intent_data_enrichment/main_enrichment.py
+++ b/src/intent_data_enrichment/main_enrichment.py
@@ -110,11 +110,12 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
         # Step 4: Store in Qdrant
         logger.info("Step 3: Storing in Qdrant")
         qdrant = QdrantManager()
-        qdrant.connect()
-        qdrant.ensure_collection()
-
-        success = qdrant.upsert_service(enriched_service)
-        qdrant.close()
+        try:
+            qdrant.connect()
+            qdrant.ensure_collection()
+            success = qdrant.upsert_service(enriched_service)
+        finally:
+            qdrant.close()
 
         if success:
             return EnrichmentResult(
diff --git a/src/intent_data_enrichment/qdrant_manager.py b/src/intent_data_enrichment/qdrant_manager.py
index 44af0e4..5024e23 100644
--- a/src/intent_data_enrichment/qdrant_manager.py
+++ b/src/intent_data_enrichment/qdrant_manager.py
@@ -70,21 +70,27 @@ def ensure_collection(self) -> None:
                     existing_vector_size = vectors_config.size
 
                 if existing_vector_size is None:
-                    logger.warning(
-                        f"Could not determine vector size for '{self.collection_name}', recreating"
+                    logger.error(
+                        f"Collection '{self.collection_name}' exists but vector size cannot be determined"
+                    )
+                    raise RuntimeError(
+                        f"Collection '{self.collection_name}' exists but vector size cannot be determined. "
+                        "This may indicate a Qdrant API issue or unexpected collection configuration. "
+                        "Manual intervention required: verify Qdrant health, inspect collection config, "
+                        "or manually delete the collection if recreating is intended."
                     )
-                    self.client.delete_collection(self.collection_name)
-                    self._create_collection()
                 elif existing_vector_size != EnrichmentConstants.VECTOR_SIZE:
-                    logger.warning(
-                        f"Collection '{self.collection_name}' exists with wrong vector size: "
+                    logger.error(
+                        f"Collection '{self.collection_name}' has incompatible vector size: "
                         f"{existing_vector_size} (expected {EnrichmentConstants.VECTOR_SIZE})"
                     )
-                    logger.info(
-                        f"Deleting and recreating collection '{self.collection_name}'"
+                    raise RuntimeError(
+                        f"Collection '{self.collection_name}' has incompatible vector size "
+                        f"({existing_vector_size} vs expected {EnrichmentConstants.VECTOR_SIZE}). "
+                        "This prevents automatic deletion to avoid accidental data loss. "
+                        "To recreate the collection, manually delete it first using: "
+                        f"qdrant.client.delete_collection('{self.collection_name}') or via Qdrant UI/API."
                     )
-                    self.client.delete_collection(self.collection_name)
-                    self._create_collection()
                 else:
                     logger.info(
                         f"Collection '{self.collection_name}' already exists "
@@ -120,7 +126,8 @@ def upsert_service(self, enriched_service: EnrichedService) -> bool:
         Upsert enriched service to Qdrant (update if exists, insert if new).
 
         Args:
-            enriched_service: Enric_CLIENT_NOT_INITIALIZED
+            enriched_service: EnrichedService instance containing the embedding and
+                associated metadata to upsert into Qdrant.
 
         Returns:
             True if successful, False otherwise

From 9b7bc7b4d68a65ce828ef3c5c2e25c820b7ce8d0 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 20 Feb 2026 18:07:26 +0530
Subject: [PATCH 04/27] fixed issue

---
 DSL/Ruuter.public/rag-search/POST/services/enrich.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/DSL/Ruuter.public/rag-search/POST/services/enrich.yml b/DSL/Ruuter.public/rag-search/POST/services/enrich.yml
index 8e42737..5748ad5 100644
--- a/DSL/Ruuter.public/rag-search/POST/services/enrich.yml
+++ b/DSL/Ruuter.public/rag-search/POST/services/enrich.yml
@@ -74,8 +74,14 @@ execute_enrichment:
       current_state: ${service_current_state}
       is_common: ${service_is_common}
   result: enrichment_result
-  next: assign_success
   on_error: handle_cron_error
+  next: check_enrichment_status
+
+check_enrichment_status:
+  switch:
+    - condition: ${200 <= enrichment_result.response.statusCodeValue && enrichment_result.response.statusCodeValue < 300}
+      next: assign_success
+  next: assign_cron_failure
 
 handle_cron_error:
   log: "ERROR: Failed to queue enrichment job - ${enrichment_result.error || 'CronManager unreachable'}"

From a2084e59331fde9601920902d1355f6c3cb0969d Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 24 Feb 2026 15:24:40 +0530
Subject: [PATCH 05/27] service workflow implementation without calling service
 endpoints

---
 .../rag-search/POST/count-active-services.sql |  11 +
 .../POST/get-all-active-services.sql          |  20 +
 .../rag-search/POST/get-service-by-id.sql     |  24 +
 .../rag-search/GET/services/get-services.yml  |  60 ++
 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md  | 659 ++++++++++++++
 src/intent_data_enrichment/constants.py       |   2 +
 src/intent_data_enrichment/main_enrichment.py |  35 +-
 src/tool_classifier/classifier.py             | 155 ++--
 src/tool_classifier/constants.py              |  64 ++
 src/tool_classifier/intent_detector.py        | 133 +++
 .../workflows/service_workflow.py             | 855 +++++++++++++++---
 11 files changed, 1827 insertions(+), 191 deletions(-)
 create mode 100644 DSL/Resql/rag-search/POST/count-active-services.sql
 create mode 100644 DSL/Resql/rag-search/POST/get-all-active-services.sql
 create mode 100644 DSL/Resql/rag-search/POST/get-service-by-id.sql
 create mode 100644 DSL/Ruuter.public/rag-search/GET/services/get-services.yml
 create mode 100644 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
 create mode 100644 src/tool_classifier/constants.py
 create mode 100644 src/tool_classifier/intent_detector.py

diff --git a/DSL/Resql/rag-search/POST/count-active-services.sql b/DSL/Resql/rag-search/POST/count-active-services.sql
new file mode 100644
index 0000000..d68d273
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/count-active-services.sql
@@ -0,0 +1,11 @@
+-- Count active services for tool classifier
+-- Used by Service Workflow to determine search strategy:
+-- - If count <= 50: Use all services for LLM context
+-- - If count > 50: Use Qdrant semantic search for top 20
+
+SELECT 
+    COUNT(*) AS active_service_count
+FROM 
+    public.services
+WHERE 
+    current_state = 'active';
diff --git a/DSL/Resql/rag-search/POST/get-all-active-services.sql b/DSL/Resql/rag-search/POST/get-all-active-services.sql
new file mode 100644
index 0000000..5bd981b
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/get-all-active-services.sql
@@ -0,0 +1,20 @@
+-- Get all active services for intent detection
+-- Used when active_service_count <= 50
+-- Returns all service metadata needed for LLM intent detection
+
+SELECT 
+    service_id,
+    name,
+    description,
+    ruuter_type,
+    slot,
+    entities,
+    examples,
+    structure,
+    endpoints
+FROM 
+    public.services
+WHERE 
+    current_state = 'active'
+ORDER BY 
+    name ASC;
diff --git a/DSL/Resql/rag-search/POST/get-service-by-id.sql b/DSL/Resql/rag-search/POST/get-service-by-id.sql
new file mode 100644
index 0000000..dbf375a
--- /dev/null
+++ b/DSL/Resql/rag-search/POST/get-service-by-id.sql
@@ -0,0 +1,24 @@
+-- Get specific service by service_id for validation
+-- Used after LLM detects intent to validate the service exists and is active
+-- Returns all service details needed to trigger the external service call
+
+SELECT 
+    id,
+    service_id,
+    name,
+    description,
+    ruuter_type,
+    current_state,
+    is_common,
+    slot,
+    entities,
+    examples,
+    structure,
+    endpoints,
+    created_at,
+    updated_at
+FROM 
+    public.services
+WHERE 
+    service_id = :serviceId
+    AND current_state = 'active';
diff --git a/DSL/Ruuter.public/rag-search/GET/services/get-services.yml b/DSL/Ruuter.public/rag-search/GET/services/get-services.yml
new file mode 100644
index 0000000..d1ed395
--- /dev/null
+++ b/DSL/Ruuter.public/rag-search/GET/services/get-services.yml
@@ -0,0 +1,60 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Get services for intent detection - returns all services if count <= 10, otherwise signals to use semantic search"
+  method: get
+  returns: json
+  namespace: rag-search
+
+# Step 1: Count active services
+count_services:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/count-active-services"
+    body: {}
+  result: count_result
+  next: check_service_count
+
+# Step 2: Check if count > threshold (10)
+check_service_count:
+  assign:
+    service_count: ${Number(count_result.response.body[0].active_service_count)}
+  switch:
+    - condition: "${service_count > 10}"
+      next: return_semantic_search_flag
+  next: fetch_all_services
+
+# Step 3a: If > 10, return flag for semantic search
+return_semantic_search_flag:
+  assign:
+    semantic_search_response:
+      use_semantic_search: true
+      service_count: ${service_count}
+      message: "Service count exceeds threshold - use semantic search"
+  next: return_semantic_search_response
+
+return_semantic_search_response:
+  return: ${semantic_search_response}
+  next: end
+
+# Step 3b: If <= 10, fetch all services
+fetch_all_services:
+  call: http.post
+  args:
+    url: "[#RAG_SEARCH_RESQL]/get-all-active-services"
+    body: {}
+  result: services_result
+  next: return_all_services
+
+# Step 4: Return all services for LLM
+return_all_services:
+  assign:
+    all_services_response:
+      use_semantic_search: false
+      service_count: ${services_result.response.body.length}
+      services: ${services_result.response.body}
+  next: return_all_services_response
+
+return_all_services_response:
+  return: ${all_services_response}
+  next: end
diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
new file mode 100644
index 0000000..bb8ad44
--- /dev/null
+++ b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -0,0 +1,659 @@
+# Tool Classifier and Service Workflow Architecture
+
+## Overview
+
+The Tool Classifier implements a **layer-wise fallback architecture** that routes user queries to the most appropriate workflow handler. The Service Workflow (Layer 1) handles external API/service calls with intelligent intent detection and entity extraction.
+
+---
+
+## Tool Classifier - Layer Architecture
+
+### Design Pattern: Chain of Responsibility
+
+The classifier tries each layer sequentially. If a layer returns `None`, it falls back to the next layer:
+
+```
+Layer 1: SERVICE  → External API calls (currency, weather, etc.)
+Layer 2: CONTEXT  → Greetings, conversation history queries
+Layer 3: RAG      → Knowledge base retrieval (documents, regulations)
+Layer 4: OOD      → Out-of-domain fallback (polite rejection)
+```
+
+### Layer Execution Flow
+
+```python
+# Non-streaming mode
+classification = await classifier.classify(query, history, language)
+response = await classifier.route_to_workflow(classification, request, is_streaming=False)
+
+# Streaming mode
+classification = await classifier.classify(query, history, language)
+stream = await classifier.route_to_workflow(classification, request, is_streaming=True)
+async for sse_chunk in stream:
+    yield sse_chunk
+```
+
+### Fallback Chain
+
+Each workflow's `execute_async()` or `execute_streaming()` can return:
+- **OrchestrationResponse / AsyncIterator[str]**: Layer handled the query successfully
+- **None**: Layer cannot handle → Fallback to next layer
+
+Example cascading:
+```
+Query: "What is VAT rate?"
+└─ SERVICE (Layer 1) → No matching service → Returns None
+   └─ CONTEXT (Layer 2) → Not a greeting → Returns None
+      └─ RAG (Layer 3) → Found in docs → Returns response ✓
+```
+
+---
+
+## Service Workflow (Layer 1) - Detailed Architecture
+
+### Purpose
+Handle queries that require calling external services/APIs:
+- Currency conversion: "How much is 100 EUR in USD?"
+- Weather services: "What's the temperature in Tallinn?"
+- Custom Ruuter endpoints: Any service registered in database
+
+### High-Level Flow
+
+```
+1. Service Discovery
+   ↓
+2. Service Selection (Semantic Search or LLM-based)
+   ↓
+3. Intent Detection (DSPy LLM Call)
+   ↓
+4. Entity Extraction (From LLM Output)
+   ↓
+5. Entity Validation (Against Service Schema)
+   ↓
+6. Entity Transformation (Dict → Ordered Array)
+   ↓
+7. Service Call (TODO: Ruuter endpoint invocation)
+```
+
+---
+
+## 1. Service Discovery
+
+### Method: `_call_service_discovery()`
+
+Calls Ruuter public endpoint to fetch available services:
+
+```python
+GET /rag-search/get-services-from-llm
+```
+
+**Response Structure:**
+```json
+{
+  "response": {
+    "service_count": 15,
+    "use_semantic_search": true,
+    "services": [
+      {
+        "serviceId": "currency_conversion_eur",
+        "name": "Currency Conversion (EUR Base)",
+        "description": "Convert EUR to other currencies",
+        "ruuterType": "POST",
+        "ruuterUrl": "/currency/convert",
+        "entities": ["target_currency"],
+        "examples": [
+          "How much is 100 EUR in USD?",
+          "Convert EUR to JPY"
+        ]
+      }
+    ]
+  }
+}
+```
+
+### Service Count Threshold Logic
+
+```python
+SERVICE_COUNT_THRESHOLD = 10
+
+if service_count <= 10:
+    # Few services → Use all services for LLM intent detection
+    services = response["services"]
+    
+elif service_count > 10:
+    # Many services → Use semantic search to narrow down
+    services = await _semantic_search_services(query, top_k=5)
+```
+
+---
+
+## 2. Service Selection
+
+### Semantic Search (When Many Services)
+
+**Method:** `_semantic_search_services()`
+
+Uses Qdrant vector database to find relevant services:
+
+```python
+# 1. Generate embedding for user query
+embedding = orchestration_service.create_embeddings_for_indexer([query])
+
+# 2. Search Qdrant collection
+search_payload = {
+    "vector": query_embedding,
+    "limit": 5,                        # Top 5 services
+    "score_threshold": 0.4,            # Minimum similarity
+    "with_payload": True
+}
+
+response = qdrant_client.post(
+    f"/collections/{QDRANT_COLLECTION}/points/search",
+    json=search_payload
+)
+```
+
+**Returns:** Top-K most semantically relevant services for intent detection
+
+---
+
+## 3. Intent Detection (LLM-Based)
+
+### Method: `_detect_service_intent()`
+
+Uses **DSPy + LLM** to intelligently match user query to a specific service and extract entities.
+
+### DSPy Module: `IntentDetectionModule`
+
+**Purpose:** Analyze user query against available services and extract structured information
+
+**Signature:**
+```python
+class ServiceIntentDetector(dspy.Signature):
+    # Inputs
+    user_query: str                    # "How much is 100 EUR in USD?"
+    available_services: str            # JSON of service definitions
+    conversation_context: str          # Recent 3 conversation turns
+    
+    # Output
+    intent_result: str                 # JSON: {matched_service_id, confidence, entities, reasoning}
+```
+
+### LLM Call Flow
+
+```python
+# 1. Prepare service context
+services_formatted = [
+    {
+        "service_id": "currency_conversion_eur",
+        "name": "Currency Conversion",
+        "description": "Convert EUR to other currencies",
+        "required_entities": ["target_currency"],
+        "examples": ["How much is EUR in USD?", "Convert EUR to JPY"]
+    }
+]
+
+# 2. Prepare conversation context (last 3 turns)
+conversation_context = """
+user: Hello
+assistant: Hi! How can I help?
+user: How much is 100 EUR in USD?
+"""
+
+# 3. Call DSPy module
+intent_result = intent_detector.forward(
+    user_query="How much is 100 EUR in USD?",
+    services=services_formatted,
+    conversation_history=conversation_history
+)
+```
+
+### LLM Output Format
+
+The LLM returns structured JSON:
+
+```json
+{
+  "matched_service_id": "currency_conversion_eur",
+  "confidence": 0.95,
+  "entities": {
+    "target_currency": "USD"
+  },
+  "reasoning": "User wants to convert EUR to USD, matches currency conversion service"
+}
+```
+
+### Confidence Threshold
+
+```python
+if confidence < 0.7:
+    # Low confidence → Service workflow returns None → Fallback to RAG
+    return None
+```
+
+### Cost Tracking
+
+Intent detection is an LLM call, so costs are tracked:
+
+```python
+# Before LLM call
+history_length_before = len(dspy.settings.lm.history)
+
+# Call intent detector
+intent_result = intent_module.forward(...)
+
+# After LLM call
+usage_info = get_lm_usage_since(history_length_before)
+costs_dict["intent_detection"] = usage_info
+
+# Later: orchestration_service._log_costs(costs_dict)
+```
+
+---
+
+## 4. Entity Extraction
+
+### From LLM Output
+
+The LLM extracts entities directly from the user query:
+
+**User Query:** `"Palju saan 1 EUR eest THBdes?"`  
+(Estonian: "How much do I get for 1 EUR in THB?")
+
+**LLM Extraction:**
+```json
+{
+  "entities": {
+    "target_currency": "THB"
+  }
+}
+```
+
+### Entity Format
+
+Entities are extracted as **key-value pairs** where:
+- **Key**: Entity name defined in service schema (`target_currency`)
+- **Value**: Extracted value from user query (`"THB"`)
+
+### Multi-Entity Example
+
+**Service Schema:**
+```json
+{
+  "serviceId": "weather_forecast",
+  "entities": ["location", "date"]
+}
+```
+
+**User Query:** "What's the weather in Tallinn tomorrow?"
+
+**LLM Extraction:**
+```json
+{
+  "entities": {
+    "location": "Tallinn",
+    "date": "tomorrow"
+  }
+}
+```
+
+---
+
+## 5. Entity Validation
+
+### Method: `_validate_entities()`
+
+Validates extracted entities against the service's expected schema.
+
+### Validation Checks
+
+#### 1. Missing Entities
+Entities required by schema but not extracted by LLM:
+
+```python
+service_schema = ["target_currency", "amount"]
+extracted = {"target_currency": "USD"}
+
+# Missing: "amount"
+missing_entities = ["amount"]
+```
+
+**Strategy:** Send empty string for missing entities (let service validate)
+
+#### 2. Extra Entities
+Entities extracted but not in service schema:
+
+```python
+service_schema = ["target_currency"]
+extracted = {"target_currency": "USD", "random_field": "value"}
+
+# Extra: "random_field"
+extra_entities = ["random_field"]
+```
+
+**Strategy:** Ignore extra entities (not sent to service)
+
+#### 3. Empty Values
+Entities extracted but with empty values:
+
+```python
+extracted = {"target_currency": ""}
+
+validation_errors = ["Entity 'target_currency' has empty value"]
+```
+
+**Strategy:** Log warning, proceed anyway (service validates)
+
+### Validation Result
+
+```python
+{
+  "is_valid": True,                    # Always true (lenient validation)
+  "missing_entities": ["amount"],      # Will send empty strings
+  "extra_entities": ["random_field"],  # Will be ignored
+  "validation_errors": [               # Warnings only
+    "Entity 'amount' has empty value"
+  ]
+}
+```
+
+### Validation Philosophy
+
+**Lenient Approach:**
+- Always returns `is_valid: True`
+- Proceeds with partial entities
+- Service endpoint validates required parameters
+- Avoids false negatives from over-strict validation
+
+---
+
+## 6. Entity Transformation
+
+### Method: `_transform_entities_to_array()`
+
+Transforms entity dictionary to **ordered array** matching service schema order.
+
+### Why Ordered Array?
+
+Ruuter services expect parameters in specific order:
+```python
+# Service schema defines order
+entities_schema = ["target_currency", "source_currency", "amount"]
+
+# LLM extraction (unordered dict)
+entities_dict = {
+  "amount": "100",
+  "target_currency": "USD",
+  "source_currency": "EUR"
+}
+
+# Transform to ordered array
+entities_array = ["USD", "EUR", "100"]
+#                  ↑      ↑      ↑
+#                  [0]    [1]    [2]  (matches schema order)
+```
+
+### Transformation Logic
+
+```python
+def _transform_entities_to_array(
+    entities_dict: Dict[str, str],
+    entity_order: List[str]
+) -> List[str]:
+    """Transform entity dict to ordered array."""
+    ordered_array = []
+    
+    for entity_key in entity_order:
+        # Get value from dict, or empty string if missing
+        value = entities_dict.get(entity_key, "")
+        ordered_array.append(value)
+    
+    return ordered_array
+```
+
+### Example
+
+**Service Schema:**
+```json
+["target_currency", "base_currency", "amount"]
+```
+
+**Extracted Entities:**
+```json
+{
+  "target_currency": "JPY",
+  "amount": "500"
+}
+```
+
+**Transformed Array:**
+```python
+["JPY", "", "500"]
+#        ↑
+#   Missing "base_currency" → empty string
+```
+
+---
+
+## 7. Service Call (TODO: Step 7)
+
+### Endpoint Construction
+
+```python
+endpoint_url = f"{RUUTER_BASE_URL}{service_metadata['ruuter_url']}"
+# Example: "http://ruuter:8080/currency/convert"
+```
+
+### Payload Construction (Planned)
+
+```python
+payload = {
+    "input": entities_array,         # ["USD", "EUR", "100"]
+    "authorId": request.authorId,
+    "chatId": request.chatId
+}
+```
+
+### HTTP Call (Planned)
+
+```python
+# Non-streaming
+response = await httpx.post(
+    endpoint_url,
+    json=payload,
+    timeout=5.0
+)
+
+# Streaming
+async with httpx.stream("POST", endpoint_url, json=payload) as stream:
+    async for line in stream.aiter_lines():
+        yield orchestration_service._format_sse(chat_id, line)
+```
+
+---
+
+## Complete Example Flow
+
+### User Query
+```
+"Palju saan 1 EUR eest THBdes?"
+(How much do I get for 1 EUR in THB?)
+```
+
+### Step-by-Step Execution
+
+#### 1. Service Discovery
+```json
+{
+  "service_count": 5,
+  "services": [
+    {
+      "serviceId": "currency_conversion_eur",
+      "name": "Currency Conversion (EUR)",
+      "entities": ["target_currency"],
+      "examples": ["How much is EUR in USD?"]
+    }
+  ]
+}
+```
+
+#### 2. Service Selection
+```python
+# Few services (5 <= 10) → Use all for intent detection
+services = discovery_result["services"]
+```
+
+#### 3. Intent Detection (LLM Call)
+```json
+{
+  "matched_service_id": "currency_conversion_eur",
+  "confidence": 0.92,
+  "entities": {
+    "target_currency": "THB"
+  },
+  "reasoning": "User wants to convert EUR to THB"
+}
+```
+
+#### 4. Entity Extraction
+```python
+entities_dict = {"target_currency": "THB"}
+```
+
+#### 5. Entity Validation
+```python
+validation_result = {
+  "is_valid": True,
+  "missing_entities": [],
+  "extra_entities": [],
+  "validation_errors": []
+}
+```
+
+#### 6. Entity Transformation
+```python
+# Schema: ["target_currency"]
+# Dict: {"target_currency": "THB"}
+# Array: ["THB"]
+entities_array = ["THB"]
+```
+
+#### 7. Service Call (TODO)
+```python
+# Planned implementation
+response = await call_service(
+    url="http://ruuter:8080/currency/convert",
+    method="POST",
+    payload={"input": ["THB"], "chatId": "..."}
+)
+```
+
+---
+
+## Cost Tracking
+
+Service workflow tracks LLM costs following the RAG workflow pattern:
+
+```python
+# Create costs dict at workflow level
+costs_dict: Dict[str, Dict[str, Any]] = {}
+
+# Intent detection captures costs
+intent_result, intent_usage = await _detect_service_intent(...)
+costs_dict["intent_detection"] = intent_usage
+
+# Log costs after workflow completes
+orchestration_service._log_costs(costs_dict)
+```
+
+**Cost Breakdown Logged:**
+```
+LLM USAGE COSTS BREAKDOWN:
+  intent_detection    : $0.000120 (1 calls, 450 tokens)
+```
+
+---
+
+## Fallback Behavior
+
+### When Service Workflow Returns None
+
+```python
+# Scenario 1: No service match (confidence < 0.7)
+if not intent_result or intent_result.get("confidence", 0) < 0.7:
+    return None  # Fallback to CONTEXT layer
+
+# Scenario 2: Service validation failed
+if not validated_service:
+    return None  # Fallback to CONTEXT layer
+
+# Scenario 3: No services discovered
+if not services:
+    return None  # Fallback to CONTEXT layer
+```
+
+### Fallback Chain Result
+
+```
+Query: "What is VAT?"
+└─ SERVICE → No service matches "VAT information" → None
+   └─ CONTEXT → Not a greeting → None
+      └─ RAG → Found in knowledge base → Response ✓
+```
+
+---
+
+## Configuration Constants
+
+```python
+# Service discovery
+RUUTER_BASE_URL = "http://ruuter.public:8080"
+SERVICE_DISCOVERY_TIMEOUT = 5.0  # seconds
+
+# Service selection thresholds
+SERVICE_COUNT_THRESHOLD = 10      # Switch to semantic search if exceeded
+MAX_SERVICES_FOR_LLM_CONTEXT = 20 # Max services to pass to LLM
+
+# Semantic search
+QDRANT_COLLECTION = "services_collection"
+SEMANTIC_SEARCH_TOP_K = 5         # Top 5 relevant services
+SEMANTIC_SEARCH_THRESHOLD = 0.4   # Minimum similarity score
+QDRANT_TIMEOUT = 2.0              # seconds
+
+# Intent detection
+INTENT_CONFIDENCE_THRESHOLD = 0.7 # Minimum confidence to proceed
+```
+
+---
+
+## Key Design Decisions
+
+### 1. **Lenient Entity Validation**
+- Proceeds with partial entities
+- Service validates required parameters
+- Reduces false negatives
+
+### 2. **Ordered Entity Arrays**
+- Ruuter services expect positional parameters
+- Schema defines canonical order
+- Missing entities → empty strings
+
+### 3. **Two-Stage Service Selection**
+- Few services (≤10): Pass all to LLM
+- Many services (>10): Semantic search first
+
+### 4. **LLM-Based Intent Detection**
+- Intelligent service matching
+- Natural language understanding
+- Multilingual support (Estonian, English, Russian)
+
+### 5. **Cost Tracking**
+- Follows RAG workflow pattern
+- Tracks intent detection LLM costs
+- Integrated with budget system
+
+---
+
+## Summary
+
+The Tool Classifier's layer architecture enables intelligent query routing with graceful fallbacks. The Service Workflow (Layer 1) uses **LLM-based intent detection** to match user queries to external services, extract entities, validate them against service schemas, and prepare them for service invocation—all while maintaining comprehensive cost tracking and seamless integration with the broader RAG pipeline.
diff --git a/src/intent_data_enrichment/constants.py b/src/intent_data_enrichment/constants.py
index fd15a6a..f1f35f3 100644
--- a/src/intent_data_enrichment/constants.py
+++ b/src/intent_data_enrichment/constants.py
@@ -43,4 +43,6 @@ class EnrichmentConstants:
 - Related concepts
 - Common ways users might express this intent
 
+IMPORTANT: Generate the context in the SAME LANGUAGE as the service description above. If the description is in Estonian, respond in Estonian. If in English, respond in English. If in Russian, respond in Russian.
+
 Answer only with the enriched context and nothing else."""
diff --git a/src/intent_data_enrichment/main_enrichment.py b/src/intent_data_enrichment/main_enrichment.py
index 2aedb26..d718678 100644
--- a/src/intent_data_enrichment/main_enrichment.py
+++ b/src/intent_data_enrichment/main_enrichment.py
@@ -91,12 +91,35 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
             context = await api_client.generate_context(service_data)
             logger.success(f"Context generated: {len(context)} characters")
 
-            # Step 2: Create embedding for the context
-            logger.info("Step 2: Creating embedding vector")
-            embedding = await api_client.create_embedding(context)
+            # Step 2: Combine generated context with original metadata for embedding
+            logger.info("Step 2: Combining context with original service metadata")
+            combined_text_parts = [
+                f"Service Name: {service_data.name}",
+                f"Description: {service_data.description}",
+            ]
+
+            if service_data.examples:
+                combined_text_parts.append(
+                    f"Example Queries: {' | '.join(service_data.examples)}"
+                )
+
+            if service_data.entities:
+                combined_text_parts.append(
+                    f"Required Entities: {', '.join(service_data.entities)}"
+                )
+
+            # Add generated context last (enriched understanding)
+            combined_text_parts.append(f"Enriched Context: {context}")
+
+            combined_text = "\n".join(combined_text_parts)
+            logger.info(f"Combined text length: {len(combined_text)} characters")
+
+            # Step 3: Create embedding for combined text
+            logger.info("Step 3: Creating embedding vector for combined text")
+            embedding = await api_client.create_embedding(combined_text)
             logger.success(f"Embedding created: {len(embedding)}-dimensional vector")
 
-        # Step 3: Prepare enriched service
+        # Step 4: Prepare enriched service
         enriched_service = EnrichedService(
             id=service_data.service_id,
             name=service_data.name,
@@ -107,8 +130,8 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
             embedding=embedding,
         )
 
-        # Step 4: Store in Qdrant
-        logger.info("Step 3: Storing in Qdrant")
+        # Step 5: Store in Qdrant
+        logger.info("Step 5: Storing in Qdrant")
         qdrant = QdrantManager()
         try:
             qdrant.connect()
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index 71a4592..ec9dc95 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -55,6 +55,7 @@ def __init__(
         # Initialize workflow executors
         self.service_workflow = ServiceWorkflowExecutor(
             llm_manager=llm_manager,
+            orchestration_service=orchestration_service,
         )
         self.context_workflow = ContextWorkflowExecutor(
             llm_manager=llm_manager,
@@ -75,10 +76,11 @@ async def classify(
         """
         Classify a user query to determine which workflow should handle it.
 
-        Implements layer-wise classification logic:
-        1. Check if SERVICE workflow can handle (intent detection)
-        2. Check if CONTEXT workflow can handle (greeting/history check)
-        3. Default to RAG workflow (knowledge retrieval)
+        Implements layer-wise classification logic with fallback chain:
+        1. SERVICE workflow (external API calls)
+        2. CONTEXT workflow (greetings/conversation history)
+        3. RAG workflow (knowledge base retrieval)
+        4. OOD workflow (out-of-domain)
 
         Args:
             query: User's query string
@@ -87,60 +89,15 @@ async def classify(
 
         Returns:
             ClassificationResult indicating which workflow to use
-
-        Note:
-            In this skeleton, always defaults to RAG. Full implementation
-            will add Layer 1 and Layer 2 logic in separate tasks.
         """
         logger.info(f"Classifying query: {query[:100]}...")
 
-        # TODO: LAYER 1 - SERVICE WORKFLOW DETECTION
-        # Implementation task: Service workflow implementation
-        # Logic:
-        # 1. Count active services in database
-        # 2. If count > 50: Use Qdrant semantic search for top 20 services
-        # 3. If count <= 50: Use all services
-        # 4. Call LLM to detect intent and extract entities
-        # 5. If intent detected and service valid: return SERVICE classification
-        # Example:
-        #   service_check = await self._check_service_layer(query, language)
-        #   if service_check.can_handle:
-        #       return ClassificationResult(
-        #           workflow=WorkflowType.SERVICE,
-        #           confidence=service_check.confidence,
-        #           metadata=service_check.metadata,
-        #           reasoning="Service intent detected"
-        #       )
-
-        # TODO: LAYER 2 - CONTEXT WORKFLOW DETECTION
-        # Implementation task: Context workflow implementation
-        # Logic:
-        # 1. Check if query is a greeting using LLM
-        # 2. If greeting: return CONTEXT classification
-        # 3. If conversation_history exists: Check if query references history
-        # 4. Call LLM to determine if history contains answer
-        # 5. If can answer from history: return CONTEXT classification
-        # Example:
-        #   context_check = await self._check_context_layer(
-        #       query, conversation_history, language
-        #   )
-        #   if context_check.can_handle:
-        #       return ClassificationResult(
-        #           workflow=WorkflowType.CONTEXT,
-        #           confidence=context_check.confidence,
-        #           metadata=context_check.metadata,
-        #           reasoning="Greeting or answerable from history"
-        #       )
-
-        # LAYER 3 - RAG WORKFLOW (DEFAULT)
-        # Always defaults to RAG for now
-        # RAG workflow will handle the query or return OOD if no chunks found
-        logger.info("Defaulting to RAG workflow (Layers 1-2 not implemented)")
+        logger.info("Starting layer-wise fallback: ")
         return ClassificationResult(
-            workflow=WorkflowType.RAG,
+            workflow=WorkflowType.SERVICE,
             confidence=1.0,
             metadata={},
-            reasoning="Default to RAG workflow (service and context layers not implemented)",
+            reasoning="Start with Service workflow - will cascade through layers",
         )
 
     @overload
@@ -235,10 +192,7 @@ async def _execute_with_fallback_async(
         """
         Execute workflow with fallback to subsequent layers (non-streaming).
 
-        TODO: Implement full fallback chain logic
-        Currently just executes the primary workflow.
-
-        Full implementation should:
+        Implementation:
         1. Try primary workflow
         2. If returns None, try next layer in WORKFLOW_LAYER_ORDER
         3. Continue until workflow returns non-None result
@@ -256,19 +210,38 @@ async def _execute_with_fallback_async(
                 logger.info(f"[{chat_id}] {workflow_name} handled successfully")
                 return result
 
-            # TODO: Implement fallback to next layer
-            # For now, if workflow returns None, call RAG as fallback
-            logger.warning(
+            # Implement layer-wise fallback chain
+            logger.info(
                 f"[{chat_id}] {workflow_name} returned None, "
-                f"falling back to RAG workflow"
+                f"trying next layer in fallback chain"
             )
-            rag_result = await self.rag_workflow.execute_async(request, {})
-            if rag_result is not None:
-                return rag_result
-            else:
-                # This should never happen since RAG always returns a result
-                # But handle gracefully
-                raise RuntimeError("RAG workflow returned None unexpectedly")
+
+            # Get the layer order starting from current layer
+            from tool_classifier.enums import WORKFLOW_LAYER_ORDER
+
+            current_index = WORKFLOW_LAYER_ORDER.index(start_layer)
+            remaining_layers = WORKFLOW_LAYER_ORDER[current_index + 1 :]
+
+            # Try each subsequent layer in order
+            for next_layer in remaining_layers:
+                next_workflow = self._get_workflow_executor(next_layer)
+                next_name = WORKFLOW_DISPLAY_NAMES.get(next_layer, next_layer.value)
+
+                logger.info(
+                    f"[{chat_id}] Falling back to {next_name} (Layer {current_index + 2})"
+                )
+
+                result = await next_workflow.execute_async(request, {})
+
+                if result is not None:
+                    logger.info(f"[{chat_id}] {next_name} handled successfully")
+                    return result
+
+                logger.info(f"[{chat_id}] {next_name} returned None, continuing...")
+                current_index += 1
+
+            # This should never happen since RAG/OOD should always return result
+            raise RuntimeError("All workflows returned None (unexpected)")
 
         except Exception as e:
             logger.error(f"[{chat_id}] Error executing {workflow_name}: {e}")
@@ -290,10 +263,7 @@ async def _execute_with_fallback_streaming(
         """
         Execute workflow with fallback to subsequent layers (streaming).
 
-        TODO: Implement full fallback chain logic
-        Currently just executes the primary workflow.
-
-        Full implementation should:
+        Implementation:
         1. Try primary workflow
         2. If returns None, try next layer in WORKFLOW_LAYER_ORDER
         3. Stream from the first workflow that returns non-None
@@ -313,18 +283,41 @@ async def _execute_with_fallback_streaming(
                     yield chunk
                 return
 
-            # TODO: Implement fallback to next layer
-            # For now, if workflow returns None, call RAG as fallback
-            logger.warning(
+            # Implement layer-wise fallback chain for streaming
+            logger.info(
                 f"[{chat_id}] {workflow_name} returned None, "
-                f"falling back to RAG workflow streaming"
+                f"trying next layer in fallback chain"
             )
-            streaming_result = await self.rag_workflow.execute_streaming(request, {})
-            if streaming_result is not None:
-                async for chunk in streaming_result:
-                    yield chunk
-            else:
-                raise RuntimeError("RAG workflow returned None unexpectedly")
+
+            # Get the layer order starting from current layer
+            from tool_classifier.enums import WORKFLOW_LAYER_ORDER
+
+            current_index = WORKFLOW_LAYER_ORDER.index(start_layer)
+            remaining_layers = WORKFLOW_LAYER_ORDER[current_index + 1 :]
+
+            # Try each subsequent layer in order
+            for next_layer in remaining_layers:
+                next_workflow = self._get_workflow_executor(next_layer)
+                next_name = WORKFLOW_DISPLAY_NAMES.get(next_layer, next_layer.value)
+
+                logger.info(
+                    f"[{chat_id}] Falling back to {next_name} streaming "
+                    f"(Layer {current_index + 2})"
+                )
+
+                result = await next_workflow.execute_streaming(request, {})
+
+                if result is not None:
+                    logger.info(f"[{chat_id}] {next_name} streaming started")
+                    async for chunk in result:
+                        yield chunk
+                    return
+
+                logger.info(f"[{chat_id}] {next_name} returned None, continuing...")
+                current_index += 1
+
+            # This should never happen
+            raise RuntimeError("All workflows returned None in streaming (unexpected)")
 
         except Exception as e:
             logger.error(f"[{chat_id}] Error executing {workflow_name} streaming: {e}")
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
new file mode 100644
index 0000000..a2a17f7
--- /dev/null
+++ b/src/tool_classifier/constants.py
@@ -0,0 +1,64 @@
+"""Constants and configuration for tool classifier module."""
+
+
+# ============================================================================
+# Qdrant Vector Database Configuration
+# ============================================================================
+
+import os
+
+
+QDRANT_HOST = "qdrant"
+"""Qdrant server hostname."""
+
+QDRANT_PORT = int("6333")
+"""Qdrant server port."""
+
+QDRANT_TIMEOUT = 10.0
+"""Qdrant HTTP client timeout in seconds."""
+
+
+# ============================================================================
+# Semantic Search Configuration
+# ============================================================================
+
+QDRANT_COLLECTION = "intent_collections"
+"""Qdrant collection name for service intent search."""
+
+SEMANTIC_SEARCH_TOP_K = 10
+"""Number of top services to return from semantic search."""
+
+SEMANTIC_SEARCH_THRESHOLD = 0.2
+"""Minimum similarity score threshold for semantic search (0.0-1.0).
+Lowered from 0.4 to handle broader queries."""
+
+
+# ============================================================================
+# Ruuter Service Configuration
+# ============================================================================
+
+RUUTER_BASE_URL = "http://ruuter-private:8086"
+"""Base URL for Ruuter private service endpoints."""
+
+RAG_SEARCH_RUUTER_PUBLIC = "http://ruuter-public:8086/rag-search"
+
+"""Public Ruuter endpoint for RAG search service discovery."""
+
+SERVICE_CALL_TIMEOUT = 10
+"""Timeout in seconds for external service calls via Ruuter."""
+
+SERVICE_DISCOVERY_TIMEOUT = 10.0
+"""Timeout in seconds for service discovery calls."""
+
+
+# ============================================================================
+# Service Workflow Thresholds
+# ============================================================================
+
+MAX_SERVICES_FOR_LLM_CONTEXT = 50
+"""Maximum number of services to send to LLM without semantic filtering.
+If service count exceeds this, semantic search is used to filter to top-K."""
+
+SERVICE_COUNT_THRESHOLD = 10
+"""Threshold for triggering semantic search. If service count > this value,
+semantic search is used instead of sending all services to LLM."""
diff --git a/src/tool_classifier/intent_detector.py b/src/tool_classifier/intent_detector.py
new file mode 100644
index 0000000..24c1538
--- /dev/null
+++ b/src/tool_classifier/intent_detector.py
@@ -0,0 +1,133 @@
+"""Service intent detection using DSPy."""
+
+import json
+from typing import Any, Dict, List, Optional
+
+import dspy
+from loguru import logger
+
+
+class ServiceIntentDetector(dspy.Signature):
+    """Detect which service matches user intent and extract entities.
+
+    CRITICAL LANGUAGE RULE:
+    - Understand Estonian, Russian, and English queries
+    - Extract entities in their original form from the query
+
+    Rules:
+    - Match user query against available services
+    - Extract required entity values from the query
+    - Return valid JSON format strictly
+    - If no service matches well (confidence < 0.7), return null for matched_service_id
+    - Be conservative - only match when confident
+    - Prioritize services whose examples closely match the user query
+    """
+
+    user_query: str = dspy.InputField(
+        desc="User's question/request in Estonian, Russian, or English"
+    )
+    available_services: str = dspy.InputField(
+        desc="JSON string of available services with id, name, description, entities, examples"
+    )
+    conversation_context: str = dspy.InputField(
+        desc="Recent conversation history for context (optional, may be empty)"
+    )
+
+    intent_result: str = dspy.OutputField(
+        desc='Valid JSON only: {"matched_service_id": "id_string" or null, "confidence": 0.0-1.0, "entities": {}, "reasoning": "brief explanation"}'
+    )
+
+
+class IntentDetectionModule(dspy.Module):
+    """DSPy Module for service intent detection."""
+
+    def __init__(self) -> None:
+        """Initialize intent detection module with ChainOfThought."""
+        super().__init__()
+        self.detector = dspy.ChainOfThought(ServiceIntentDetector)
+
+    def forward(
+        self,
+        user_query: str,
+        services: List[Dict[str, Any]],
+        conversation_history: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Detect service intent using LLM via DSPy.
+
+        Args:
+            user_query: User's query
+            services: List of service dicts with serviceId, name, description, entities, examples
+            conversation_history: Recent messages (optional)
+
+        Returns:
+            Parsed intent result dict with matched_service_id, confidence, entities, reasoning
+        """
+        # Format services for prompt (keep it concise)
+        services_formatted = []
+        for s in services:
+            service_entry = {
+                "service_id": s.get("serviceId", s.get("service_id")),
+                "name": s.get("name", "Unknown"),
+                "description": s.get("description", ""),
+                "required_entities": s.get("entities", []),
+                "examples": s.get("examples", [])[:3],  # Top 3 examples
+            }
+            services_formatted.append(service_entry)
+
+        services_json = json.dumps(services_formatted, ensure_ascii=False, indent=2)
+
+        # Format conversation history
+        if conversation_history:
+            history_lines = []
+            for msg in conversation_history[-3:]:  # Last 3 turns
+                role = msg.get("authorRole", "unknown")
+                content = msg.get("message", "")
+                if content:
+                    history_lines.append(f"{role}: {content}")
+            history_text = "\n".join(history_lines) if history_lines else "(Empty)"
+        else:
+            history_text = "(No conversation history)"
+
+        # Call DSPy detector with ChainOfThought
+        result = None
+        try:
+            result = self.detector(
+                user_query=user_query,
+                available_services=services_json,
+                conversation_context=history_text,
+            )
+
+            # Parse JSON response
+            intent_data = json.loads(result.intent_result)
+
+            # Validate structure
+            if not isinstance(intent_data, dict):
+                raise ValueError("Intent result is not a dictionary")
+
+            # Ensure required keys exist
+            intent_data.setdefault("matched_service_id", None)
+            intent_data.setdefault("confidence", 0.0)
+            intent_data.setdefault("entities", {})
+            intent_data.setdefault("reasoning", "")
+
+            return intent_data
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse intent JSON: {e}")
+            if result:
+                logger.error(f"Raw response: {result.intent_result}")
+            return {
+                "matched_service_id": None,
+                "confidence": 0.0,
+                "entities": {},
+                "reasoning": f"JSON parse error: {e}",
+            }
+        except Exception as e:
+            logger.error(f"Intent detection forward failed: {e}", exc_info=True)
+            return {
+                "matched_service_id": None,
+                "confidence": 0.0,
+                "entities": {},
+                "reasoning": f"Detection error: {e}",
+            }
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index 8a6889b..9c7f83e 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -1,137 +1,784 @@
 """Service workflow executor - Layer 1: External service/API calls."""
 
-from typing import Any, AsyncIterator, Dict, Optional
+from typing import Any, AsyncIterator, Dict, List, Optional, Protocol
+
+import dspy
+import httpx
 from loguru import logger
 
-from models.request_models import OrchestrationRequest, OrchestrationResponse
+from src.utils.cost_utils import get_lm_usage_since
+
+from models.request_models import (
+    OrchestrationRequest,
+    OrchestrationResponse,
+)
 from tool_classifier.base_workflow import BaseWorkflow
+from tool_classifier.constants import (
+    MAX_SERVICES_FOR_LLM_CONTEXT,
+    QDRANT_COLLECTION,
+    QDRANT_HOST,
+    QDRANT_PORT,
+    QDRANT_TIMEOUT,
+    RAG_SEARCH_RUUTER_PUBLIC,
+    RUUTER_BASE_URL,
+    SEMANTIC_SEARCH_THRESHOLD,
+    SEMANTIC_SEARCH_TOP_K,
+    SERVICE_COUNT_THRESHOLD,
+    SERVICE_DISCOVERY_TIMEOUT,
+)
+from tool_classifier.intent_detector import IntentDetectionModule
 
 
-class ServiceWorkflowExecutor(BaseWorkflow):
-    """
-    Executes external service calls via Ruuter endpoints (Layer 1).
-
-    This workflow handles queries that require calling external government
-    services or APIs. It performs:
-    1. Service discovery (semantic search if >50 services)
-    2. Intent detection using LLM
-    3. Entity extraction from query
-    4. Service validation against database
-    5. External API call via Ruuter
-    6. Output guardrails validation
-
-    Examples of Service queries:
-    - "What's the EUR to USD exchange rate?"
-    - "Check my document status"
-    - "Submit a tax declaration"
-
-    Implementation Status: SKELETON
-    Returns None (triggers fallback to Context workflow)
-
-    TODO - Full Implementation (Separate Task):
-    - Service discovery logic (Qdrant semantic search)
-    - Intent detection (LLM-based)
-    - Entity extraction and transformation
-    - Service validation (database lookup)
-    - Ruuter API integration
-    - Output guardrails for service responses
-    """
-
-    def __init__(self, llm_manager: Any):
+class LLMServiceProtocol(Protocol):
+    """Protocol defining interface for LLM service embedding operations."""
+
+    def create_embeddings_for_indexer(
+        self,
+        texts: List[str],
+        environment: str = "production",
+        connection_id: Optional[str] = None,
+        batch_size: int = 10,
+    ) -> Dict[str, Any]:
+        """Create embeddings for text inputs using the configured embedding model.
+
+        Args:
+            texts: List of text strings to embed
+            environment: Environment for model resolution
+            connection_id: Optional connection ID for service selection
+            batch_size: Number of texts to process in each batch
+
+        Returns:
+            Dictionary containing embeddings list and metadata
+        """
+        ...
+
+    def _format_sse(self, chat_id: str, content: str) -> str:
+        """Format content as SSE message.
+
+        Args:
+            chat_id: Chat/channel identifier
+            content: Content to send (token, "END", error message, etc.)
+
+        Returns:
+            SSE-formatted string: "data: {json}\\n\\n"
         """
-        Initialize service workflow executor.
+        ...
+
+    def _log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
+        """Log cost information for tracking.
 
         Args:
-            llm_manager: LLM manager for intent detection
+            costs_dict: Dictionary of costs per component
         """
+        ...
+
+
+class ServiceWorkflowExecutor(BaseWorkflow):
+    """Executes external service calls via Ruuter endpoints (Layer 1)."""
+
+    def __init__(
+        self,
+        llm_manager: Any,
+        orchestration_service: Optional[LLMServiceProtocol] = None,
+    ) -> None:
+        """Initialize service workflow executor."""
         self.llm_manager = llm_manager
-        logger.info("Service workflow executor initialized (skeleton)")
+        self.orchestration_service = orchestration_service
+        self._qdrant_client: Optional[httpx.AsyncClient] = None
 
-    async def execute_async(
+    async def _get_qdrant_client(self) -> httpx.AsyncClient:
+        """Get or create Qdrant HTTP client (lazy initialization)."""
+        if self._qdrant_client is None:
+            qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}"
+            self._qdrant_client = httpx.AsyncClient(
+                base_url=qdrant_url, timeout=QDRANT_TIMEOUT
+            )
+        return self._qdrant_client
+
+    async def _semantic_search_services(
         self,
+        query: str,
         request: OrchestrationRequest,
-        context: Dict[str, Any],
-    ) -> Optional[OrchestrationResponse]:
+        chat_id: str,
+        top_k: int = SEMANTIC_SEARCH_TOP_K,
+    ) -> Optional[List[Dict[str, Any]]]:
+        """Search services using semantic search via Qdrant."""
+        if not self.orchestration_service:
+            logger.error(
+                f"[{chat_id}] Semantic search unavailable: orchestration service not provided"
+            )
+            return None
+
+        try:
+            # Generate embedding using orchestration service
+            embedding_result = self.orchestration_service.create_embeddings_for_indexer(
+                texts=[query],
+                environment=request.environment,
+                connection_id=request.connection_id,
+                batch_size=1,
+            )
+
+            embeddings = embedding_result.get("embeddings", [])
+            if not embeddings or len(embeddings) == 0:
+                logger.error(f"[{chat_id}] No embedding returned for query")
+                return None
+
+            query_embedding = embeddings[0]
+
+            # Verify collection exists and has data
+            client = await self._get_qdrant_client()
+
+            try:
+                collection_info = await client.get(f"/collections/{QDRANT_COLLECTION}")
+                if collection_info.status_code == 200:
+                    info = collection_info.json()
+                    points_count = info.get("result", {}).get("points_count", 0)
+                    if points_count == 0:
+                        logger.error(f"[{chat_id}] Collection is empty")
+                        return None
+            except Exception as e:
+                logger.warning(f"[{chat_id}] Could not verify collection: {e}")
+
+            # Search Qdrant collection
+            client = await self._get_qdrant_client()
+
+            search_payload = {
+                "vector": query_embedding,
+                "limit": top_k,
+                "score_threshold": SEMANTIC_SEARCH_THRESHOLD,
+                "with_payload": True,
+            }
+
+            response = await client.post(
+                f"/collections/{QDRANT_COLLECTION}/points/search",
+                json=search_payload,
+            )
+
+            if response.status_code != 200:
+                logger.error(
+                    f"[{chat_id}] Qdrant search failed: HTTP {response.status_code}"
+                )
+                return None
+
+            search_results = response.json()
+            points = search_results.get("result", [])
+
+            if len(points) == 0:
+                logger.warning(
+                    f"[{chat_id}] No services matched (threshold={SEMANTIC_SEARCH_THRESHOLD})"
+                )
+                return None
+
+            # Transform Qdrant results to service format
+            services: List[Dict[str, Any]] = []
+            for point in points:
+                payload = point.get("payload", {})
+                score = float(point.get("score", 0))
+
+                service = {
+                    "serviceId": payload.get("service_id"),
+                    "service_id": payload.get("service_id"),
+                    "name": payload.get("name"),
+                    "description": payload.get("description"),
+                    "examples": payload.get("examples", []),
+                    "entities": payload.get("entities", []),
+                    # Note: endpoint not stored in intent_collections,
+                    # will be resolved via database lookup if needed
+                    "similarity_score": score,
+                }
+                services.append(service)
+
+            logger.info(
+                f"[{chat_id}] Found {len(services)} services via semantic search"
+            )
+            return services
+
+        except Exception as e:
+            logger.error(f"[{chat_id}] Semantic search failed: {e}", exc_info=True)
+            return None
+
+    async def _call_service_discovery(self, chat_id: str) -> Optional[Dict[str, Any]]:
+        """Call Ruuter endpoint to get services for intent detection."""
+        endpoint = f"{RAG_SEARCH_RUUTER_PUBLIC}/services/get-services"
+
+        try:
+            async with httpx.AsyncClient(timeout=SERVICE_DISCOVERY_TIMEOUT) as client:
+                response = await client.get(endpoint)
+                response.raise_for_status()
+                data = response.json()
+                return data
+        except httpx.TimeoutException:
+            logger.error(f"[{chat_id}] Service discovery timeout after 10s")
+            return None
+        except httpx.HTTPStatusError as e:
+            logger.error(
+                f"[{chat_id}] Service discovery HTTP error: {e.response.status_code}"
+            )
+            return None
+        except Exception as e:
+            logger.error(f"[{chat_id}] Service discovery failed: {e}", exc_info=True)
+            return None
+
+    async def _detect_service_intent(
+        self,
+        user_query: str,
+        services: List[Dict[str, Any]],
+        conversation_history: List[Any],
+        chat_id: str,
+    ) -> tuple[Optional[Dict[str, Any]], Dict[str, Any]]:
+        """Use DSPy + LLMManager to detect service intent and extract entities.
+
+        Returns:
+            Tuple of (intent_result, usage_info):
+                - intent_result: Intent detection result dict (or None on error)
+                - usage_info: Cost and token usage information
+        """
+        try:
+            # Ensure DSPy is configured with LLMManager
+            if self.llm_manager:
+                self.llm_manager.ensure_global_config()
+            else:
+                logger.error(f"[{chat_id}] LLM Manager not available")
+                return None, {}
+
+            # Capture history length before LLM call for cost tracking
+            lm = dspy.settings.lm
+            history_length_before = (
+                len(lm.history) if lm and hasattr(lm, "history") else 0
+            )
+
+            # Create DSPy module
+            intent_module = IntentDetectionModule()
+
+            # Convert conversation history to dict format
+            history_dicts = [
+                {"authorRole": msg.authorRole, "message": msg.message}
+                for msg in conversation_history
+                if hasattr(msg, "authorRole") and hasattr(msg, "message")
+            ]
+
+            # Call DSPy forward with task-local config
+            with self.llm_manager.use_task_local():
+                intent_result = intent_module.forward(
+                    user_query=user_query,
+                    services=services,
+                    conversation_history=history_dicts,
+                )
+
+            # Extract usage information after LLM call
+            usage_info = get_lm_usage_since(history_length_before)
+
+            return intent_result, usage_info
+
+        except Exception as e:
+            logger.error(f"[{chat_id}] Intent detection failed: {e}", exc_info=True)
+            return None, {}
+
+    def _validate_detected_service(
+        self,
+        matched_service_id: str,
+        services: List[Dict[str, Any]],
+        chat_id: str,
+    ) -> Optional[Dict[str, Any]]:
+        """Validate that detected service exists in active services list."""
+        for service in services:
+            service_id = service.get("serviceId", service.get("service_id"))
+            if service_id == matched_service_id:
+                return service
+
+        logger.warning(
+            f"[{chat_id}] Service validation failed: '{matched_service_id}' not found"
+        )
+        return None
+
+    def _extract_service_metadata(
+        self, context: Dict[str, Any], chat_id: str
+    ) -> Optional[Dict[str, Any]]:
+        """Extract service and entity metadata from context."""
+        # Check if service_id exists
+        service_id = context.get("service_id")
+        if not service_id:
+            logger.error(f"[{chat_id}] Missing service_id in context")
+            return None
+
+        # Check if service_data exists
+        service_data = context.get("service_data")
+        if not service_data:
+            logger.error(f"[{chat_id}] Missing service_data in context")
+            return None
+
+        # Extract entities dict from context (LLM extracted)
+        entities_dict = context.get("entities", {})
+
+        # Extract entity schema from service_data (expected order)
+        entity_schema = service_data.get("entities", [])
+        if entity_schema is None:
+            entity_schema = []
+
+        # Extract service name
+        service_name = service_data.get("name", service_id)
+
+        # Extract HTTP method (ruuter_type) - defaults to GET if not specified
+        ruuter_type = service_data.get("ruuter_type", "GET")
+
+        return {
+            "service_id": service_id,
+            "service_name": service_name,
+            "entities_dict": entities_dict,
+            "entity_schema": entity_schema,
+            "ruuter_type": ruuter_type,
+            "service_data": service_data,
+        }
+
+    def _validate_entities(
+        self,
+        extracted_entities: Dict[str, str],
+        service_schema: List[str],
+        service_name: str,
+        chat_id: str,
+    ) -> Dict[str, Any]:
         """
-        Execute service workflow in non-streaming mode.
-
-        TODO: Implement service workflow logic:
-        1. Extract service metadata from context (service_id, intent, entities)
-        2. Validate service exists and is active in database
-        3. Transform entities to array format for service call
-        4. Call Ruuter endpoint: POST {RUUTER_BASE_URL}/services/active{ServiceName}
-        5. Validate response with output guardrails
-        6. Return OrchestrationResponse with service result
-
-        Failure scenarios:
-        - No service_id in context → return None (fallback to Context)
-        - Service not found/inactive → return None (fallback to Context)
-        - Service call timeout → return error response
-        - Output guardrails blocked → return violation response or None
+        Validate extracted entities against service schema.
 
         Args:
-            request: Orchestration request with user query
-            context: Metadata with service_id, intent, entities
+            extracted_entities: Entity key-value pairs from LLM
+            service_schema: Expected entity keys from database
+            service_name: Service name for logging
+            chat_id: For logging
 
         Returns:
-            OrchestrationResponse with service result or None to fallback
+            Dict with validation results:
+            - is_valid: Overall validation status
+            - missing_entities: List of schema entities not extracted
+            - extra_entities: List of extracted entities not in schema
+            - validation_errors: List of error messages
         """
-        logger.debug(
-            f"[{request.chatId}] Service workflow execute_async called "
-            f"(not implemented - returning None)"
+        missing_entities = []
+        extra_entities = []
+        validation_errors = []
+
+        # Check for missing entities (in schema but not extracted)
+        for schema_key in service_schema:
+            if schema_key not in extracted_entities:
+                missing_entities.append(schema_key)
+            elif extracted_entities[schema_key] == "":
+                # Entity extracted but value is empty
+                validation_errors.append(f"Entity '{schema_key}' has empty value")
+
+        # Check for extra entities (extracted but not in schema)
+        for entity_key in extracted_entities:
+            if entity_key not in service_schema:
+                extra_entities.append(entity_key)
+
+        # Determine overall validity
+        # We consider it valid even with missing entities (will send empty strings)
+        # Let the external service validate required parameters
+        is_valid = True  # Always true - we proceed with partial entities
+
+        return {
+            "is_valid": is_valid,
+            "missing_entities": missing_entities,
+            "extra_entities": extra_entities,
+            "validation_errors": validation_errors,
+        }
+
+    def _transform_entities_to_array(
+        self, entities_dict: Dict[str, str], entity_order: List[str]
+    ) -> List[str]:
+        """Transform entity dictionary to ordered array based on service schema."""
+        if not entity_order:
+            return []
+
+        # Transform to ordered array, filling missing with empty strings
+        return [entities_dict.get(key, "") for key in entity_order]
+
+    def _construct_service_endpoint(self, service_name: str, chat_id: str) -> str:
+        """Construct the full service endpoint URL for Ruuter."""
+        return f"{RUUTER_BASE_URL}/services/active{service_name}"
+
+    def _format_debug_response(
+        self,
+        service_name: str,
+        endpoint_url: str,
+        http_method: str,
+        entities_array: List[str],
+    ) -> str:
+        """Format debug information for testing (temporary before Step 7 implementation)."""
+        entities_str = ", ".join(f'"{e}"' for e in entities_array)
+        return (
+            f" Service Validated: {service_name}\n"
+            f" Endpoint URL: {endpoint_url}\n"
+            f" HTTP Method: {http_method}\n"
+            f" Extracted Entities: [{entities_str}]\n\n"
         )
 
-        # TODO: Implement service workflow logic here
-        # For now, return None to trigger fallback to next layer
-        return None
+    async def _log_request_details(
+        self,
+        request: OrchestrationRequest,
+        context: Dict[str, Any],
+        mode: str,
+        costs_dict: Dict[str, Dict[str, Any]],
+    ) -> None:
+        """Log request details and perform service discovery.
+
+        Args:
+            request: The orchestration request
+            context: Workflow context dictionary
+            mode: Execution mode ("streaming" or "non-streaming")
+            costs_dict: Dictionary to accumulate cost tracking information
+        """
+        chat_id = request.chatId
+        logger.info(f"[{chat_id}] SERVICE WORKFLOW ({mode}): {request.message}")
+
+        # Service Discovery
+        discovery_result = await self._call_service_discovery(chat_id)
+
+        if discovery_result:
+            # Extract data from nested response structure
+            response_data = discovery_result.get("response", {})
+            use_semantic = response_data.get("use_semantic_search", False)
+            service_count = response_data.get("service_count", 0)
+
+            # Handle service_count if it's a string or NaN
+            if isinstance(service_count, str):
+                try:
+                    service_count = int(service_count)
+                except (ValueError, TypeError):
+                    service_count = 0
+
+            services_from_ruuter = response_data.get("services", [])
+
+            # Use semantic search if count > threshold
+            if service_count > SERVICE_COUNT_THRESHOLD:
+                use_semantic = True
+
+            if use_semantic:
+                # Use semantic search to find relevant services
+                services = await self._semantic_search_services(
+                    query=request.message,
+                    request=request,
+                    chat_id=chat_id,
+                    top_k=SEMANTIC_SEARCH_TOP_K,
+                )
+
+                if not services:
+                    logger.warning(f"[{chat_id}] Semantic search failed")
+
+                    if services_from_ruuter:
+                        services = services_from_ruuter
+                    elif service_count <= MAX_SERVICES_FOR_LLM_CONTEXT:
+                        fallback_result = await self._call_service_discovery(chat_id)
+                        if fallback_result:
+                            fallback_data = fallback_result.get("response", {})
+                            services = fallback_data.get("services", [])
+                        else:
+                            services = []
+                    else:
+                        logger.error(f"[{chat_id}] Too many services ({service_count})")
+                        services = []
+
+                if services:
+                    intent_result, intent_usage = await self._detect_service_intent(
+                        user_query=request.message,
+                        services=services,
+                        conversation_history=request.conversationHistory,
+                        chat_id=chat_id,
+                    )
+                    costs_dict["intent_detection"] = intent_usage
+
+                    if intent_result and intent_result.get("matched_service_id"):
+                        service_id = intent_result["matched_service_id"]
+                        logger.info(f"[{chat_id}] Matched: {service_id}")
+
+                        validated_service = self._validate_detected_service(
+                            matched_service_id=service_id,
+                            services=services,
+                            chat_id=chat_id,
+                        )
+
+                        if validated_service:
+                            context["service_id"] = service_id
+                            context["confidence"] = intent_result.get("confidence", 0.0)
+                            context["entities"] = intent_result.get("entities", {})
+                            context["service_data"] = validated_service
+            else:
+                services = response_data.get("services", [])
+
+                if services:
+                    intent_result, intent_usage = await self._detect_service_intent(
+                        user_query=request.message,
+                        services=services,
+                        conversation_history=request.conversationHistory,
+                        chat_id=chat_id,
+                    )
+                    costs_dict["intent_detection"] = intent_usage
+
+                    if intent_result and intent_result.get("matched_service_id"):
+                        service_id = intent_result["matched_service_id"]
+                        logger.info(f"[{chat_id}] Matched: {service_id}")
+
+                        validated_service = self._validate_detected_service(
+                            matched_service_id=service_id,
+                            services=services,
+                            chat_id=chat_id,
+                        )
+
+                        if validated_service:
+                            context["service_id"] = service_id
+                            context["confidence"] = intent_result.get("confidence", 0.0)
+                            context["entities"] = intent_result.get("entities", {})
+                            context["service_data"] = validated_service
+        else:
+            logger.warning(f"[{chat_id}] Service discovery failed")
+
+    async def execute_async(
+        self,
+        request: OrchestrationRequest,
+        context: Dict[str, Any],
+    ) -> Optional[OrchestrationResponse]:
+        """Execute service workflow in non-streaming mode."""
+        chat_id = request.chatId
+
+        # Create costs tracking dictionary (follows RAG workflow pattern)
+        costs_dict: Dict[str, Dict[str, Any]] = {}
+
+        # Log comprehensive request details and perform service discovery
+        await self._log_request_details(
+            request, context, mode="non-streaming", costs_dict=costs_dict
+        )
+
+        # Check if service was detected and validated
+        if not context.get("service_id"):
+            logger.info(
+                f"[{chat_id}] No service detected or validated - "
+                f"returning None to fallback to next layer"
+            )
+            return None
+
+        # Entity Transformation & Validation
+        logger.info(f"[{chat_id}] Entity Transformation:")
+
+        # Step 1: Extract service metadata from context
+        service_metadata = self._extract_service_metadata(context, chat_id)
+        if not service_metadata:
+            logger.error(
+                f"[{chat_id}]   - Metadata extraction failed - "
+                f"returning None to fallback"
+            )
+            return None
+
+        logger.info(f"[{chat_id}]   - Service: {service_metadata['service_name']}")
+        logger.info(
+            f"[{chat_id}]   - Schema entities: {service_metadata['entity_schema']}"
+        )
+        logger.info(
+            f"[{chat_id}]   - Extracted entities: {service_metadata['entities_dict']}"
+        )
+
+        # Step 2: Validate entities against schema
+        validation_result = self._validate_entities(
+            extracted_entities=service_metadata["entities_dict"],
+            service_schema=service_metadata["entity_schema"],
+            service_name=service_metadata["service_name"],
+            chat_id=chat_id,
+        )
+
+        logger.info(
+            f"[{chat_id}]   - Validation status: "
+            f"{'PASSED ✓' if validation_result['is_valid'] else 'FAILED ✗'}"
+        )
+
+        if validation_result["missing_entities"]:
+            logger.warning(
+                f"[{chat_id}]   - Missing entities (will send empty strings): "
+                f"{validation_result['missing_entities']}"
+            )
+
+        if validation_result["extra_entities"]:
+            logger.info(
+                f"[{chat_id}]   - Extra entities (ignored): "
+                f"{validation_result['extra_entities']}"
+            )
+
+        if validation_result["validation_errors"]:
+            for error in validation_result["validation_errors"]:
+                logger.warning(f"[{chat_id}]   - Validation warning: {error}")
+
+        # Step 3: Transform entities dict to ordered array
+        entities_array = self._transform_entities_to_array(
+            entities_dict=service_metadata["entities_dict"],
+            entity_order=service_metadata["entity_schema"],
+        )
+
+        context["entities_array"] = entities_array
+        context["validation_result"] = validation_result
+
+        # Construct service endpoint URL
+        endpoint_url = self._construct_service_endpoint(
+            service_name=service_metadata["service_name"], chat_id=chat_id
+        )
+
+        context["endpoint_url"] = endpoint_url
+        context["http_method"] = service_metadata["ruuter_type"]
+
+        logger.info(f"[{chat_id}] Service prepared: {endpoint_url}")
+
+        # TODO: STEP 7 - Call Ruuter service endpoint and return response
+        # 1. Build payload: {"input": entities_array, "authorId": request.authorId, "chatId": request.chatId}
+        # 2. Call endpoint using http_method (POST/GET) with SERVICE_CALL_TIMEOUT
+        # 3. Parse Ruuter response and extract result
+        # 4. Return OrchestrationResponse with actual service result
+        # 5. Handle errors (timeout, HTTP errors, malformed JSON)
+        # 6. Remove debug response code below (lines 589-601) after implementation
+
+        # STEP 6: Return debug response (temporary until Step 7 - Ruuter call implemented)
+        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)
+        debug_content = self._format_debug_response(
+            service_name=service_metadata["service_name"],
+            endpoint_url=endpoint_url,
+            http_method=service_metadata["ruuter_type"],
+            entities_array=entities_array,
+        )
+
+        logger.info(f"[{chat_id}] Returning debug response (Step 7 pending)")
+
+        # Log costs after service workflow completes (follows RAG workflow pattern)
+        if self.orchestration_service:
+            self.orchestration_service._log_costs(costs_dict)
+
+        return OrchestrationResponse(
+            chatId=request.chatId,
+            llmServiceActive=True,
+            questionOutOfLLMScope=False,
+            inputGuardFailed=False,
+            content=debug_content,
+        )
+        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)
 
     async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
     ) -> Optional[AsyncIterator[str]]:
-        """
-        Execute service workflow in streaming mode.
-
-        TODO: Implement service workflow streaming:
-        1. Execute service call (same as non-streaming)
-        2. Get complete service response
-        3. Validate with output guardrails (validation-first)
-        4. If blocked: yield violation message + END
-        5. If allowed: chunk response and stream token-by-token
-        6. Simulate streaming for consistent UX with RAG
-
-        Streaming approach (validation-first):
-        ```python
-        # Get complete response
-        service_response = await call_service(...)
-
-        # Validate BEFORE streaming
-        is_safe = await guardrails.check_output_async(service_response)
-        if not is_safe:
-            yield format_sse(chatId, VIOLATION_MESSAGE)
-            yield format_sse(chatId, "END")
-            return
-
-        # Stream validated response
-        for chunk in split_into_tokens(service_response, chunk_size=5):
-            yield format_sse(chatId, chunk)
-            await asyncio.sleep(0.01)
-        yield format_sse(chatId, "END")
-        ```
+        """Execute service workflow in streaming mode."""
+        chat_id = request.chatId
 
-        Args:
-            request: Orchestration request with user query
-            context: Metadata with service_id, intent, entities
+        # Create costs tracking dictionary (follows RAG workflow pattern)
+        costs_dict: Dict[str, Dict[str, Any]] = {}
 
-        Returns:
-            AsyncIterator yielding SSE strings or None to fallback
-        """
-        logger.debug(
-            f"[{request.chatId}] Service workflow execute_streaming called "
-            f"(not implemented - returning None)"
+        # Log comprehensive request details and perform service discovery
+        await self._log_request_details(
+            request, context, mode="streaming", costs_dict=costs_dict
         )
 
-        # TODO: Implement service streaming logic here
-        # For now, return None to trigger fallback to next layer
-        return None
+        # Check if service was detected and validated
+        if not context.get("service_id"):
+            logger.info(
+                f"[{chat_id}] No service detected or validated - "
+                f"returning None to fallback to next layer"
+            )
+            return None
+
+        # Entity Transformation & Validation
+        logger.info(f"[{chat_id}] Entity Transformation:")
+
+        # Step 1: Extract service metadata from context
+        service_metadata = self._extract_service_metadata(context, chat_id)
+        if not service_metadata:
+            logger.error(
+                f"[{chat_id}]   - Metadata extraction failed - "
+                f"returning None to fallback"
+            )
+            return None
+
+        logger.info(f"[{chat_id}]   - Service: {service_metadata['service_name']}")
+        logger.info(
+            f"[{chat_id}]   - Schema entities: {service_metadata['entity_schema']}"
+        )
+        logger.info(
+            f"[{chat_id}]   - Extracted entities: {service_metadata['entities_dict']}"
+        )
+
+        # Step 2: Validate entities against schema
+        validation_result = self._validate_entities(
+            extracted_entities=service_metadata["entities_dict"],
+            service_schema=service_metadata["entity_schema"],
+            service_name=service_metadata["service_name"],
+            chat_id=chat_id,
+        )
+
+        logger.info(
+            f"[{chat_id}]   - Validation status: "
+            f"{'PASSED ✓' if validation_result['is_valid'] else 'FAILED ✗'}"
+        )
+
+        if validation_result["missing_entities"]:
+            logger.warning(
+                f"[{chat_id}]   - Missing entities (will send empty strings): "
+                f"{validation_result['missing_entities']}"
+            )
+
+        if validation_result["extra_entities"]:
+            logger.info(
+                f"[{chat_id}]   - Extra entities (ignored): "
+                f"{validation_result['extra_entities']}"
+            )
+
+        if validation_result["validation_errors"]:
+            for error in validation_result["validation_errors"]:
+                logger.warning(f"[{chat_id}]   - Validation warning: {error}")
+
+        # Step 3: Transform entities dict to ordered array
+        entities_array = self._transform_entities_to_array(
+            entities_dict=service_metadata["entities_dict"],
+            entity_order=service_metadata["entity_schema"],
+        )
+
+        context["entities_array"] = entities_array
+        context["validation_result"] = validation_result
+
+        # Construct service endpoint URL
+        endpoint_url = self._construct_service_endpoint(
+            service_name=service_metadata["service_name"], chat_id=chat_id
+        )
+
+        context["endpoint_url"] = endpoint_url
+        context["http_method"] = service_metadata["ruuter_type"]
+
+        logger.info(f"[{chat_id}] Service prepared: {endpoint_url}")
+
+        # TODO: STEP 7 - Call Ruuter service endpoint and stream response
+        # 1. Build payload: {"input": entities_array, "authorId": request.authorId, "chatId": request.chatId}
+        # 2. Call endpoint using http_method (POST/GET) with SERVICE_CALL_TIMEOUT
+        # 3. Parse Ruuter response and extract result
+        # 4. Format result as SSE and yield chunks
+        # 5. Handle errors (timeout, HTTP errors, malformed JSON)
+        # 6. Remove debug response code below (lines 697-709) after implementation
+
+        # STEP 6: Return debug response as async iterator (temporary until Step 7)
+        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)
+        debug_content = self._format_debug_response(
+            service_name=service_metadata["service_name"],
+            endpoint_url=endpoint_url,
+            http_method=service_metadata["ruuter_type"],
+            entities_array=entities_array,
+        )
+
+        logger.info(f"[{chat_id}] Streaming debug response (Step 7 pending)")
+
+        if self.orchestration_service is None:
+            raise RuntimeError("Orchestration service not initialized for streaming")
+
+        # Store reference for closure (helps type checker)
+        orchestration_service = self.orchestration_service
+
+        async def debug_stream() -> AsyncIterator[str]:
+            yield orchestration_service._format_sse(chat_id, debug_content)
+            yield orchestration_service._format_sse(chat_id, "END")
+
+            # Log costs after streaming completes (follows RAG workflow pattern)
+            # Must be inside generator because costs are accumulated during streaming
+            orchestration_service._log_costs(costs_dict)
+
+        return debug_stream()
+        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)

From 864ad30682ecf88a858bbb6bbe11e4d120cd05bb Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 24 Feb 2026 17:08:07 +0530
Subject: [PATCH 06/27] fixed requested changes

---
 src/tool_classifier/constants.py              |   3 -
 .../workflows/service_workflow.py             | 238 +++++++++---------
 2 files changed, 125 insertions(+), 116 deletions(-)

diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index a2a17f7..e967e3c 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -5,9 +5,6 @@
 # Qdrant Vector Database Configuration
 # ============================================================================
 
-import os
-
-
 QDRANT_HOST = "qdrant"
 """Qdrant server hostname."""
 
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index 9c7f83e..9cd8a76 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -84,16 +84,6 @@ def __init__(
         """Initialize service workflow executor."""
         self.llm_manager = llm_manager
         self.orchestration_service = orchestration_service
-        self._qdrant_client: Optional[httpx.AsyncClient] = None
-
-    async def _get_qdrant_client(self) -> httpx.AsyncClient:
-        """Get or create Qdrant HTTP client (lazy initialization)."""
-        if self._qdrant_client is None:
-            qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}"
-            self._qdrant_client = httpx.AsyncClient(
-                base_url=qdrant_url, timeout=QDRANT_TIMEOUT
-            )
-        return self._qdrant_client
 
     async def _semantic_search_services(
         self,
@@ -102,7 +92,11 @@ async def _semantic_search_services(
         chat_id: str,
         top_k: int = SEMANTIC_SEARCH_TOP_K,
     ) -> Optional[List[Dict[str, Any]]]:
-        """Search services using semantic search via Qdrant."""
+        """Search services using semantic search via Qdrant.
+
+        Creates a new httpx.AsyncClient per request to ensure proper resource cleanup.
+        This is safe and efficient since semantic search is infrequent (only when many services exist).
+        """
         if not self.orchestration_service:
             logger.error(
                 f"[{chat_id}] Semantic search unavailable: orchestration service not provided"
@@ -125,73 +119,76 @@ async def _semantic_search_services(
 
             query_embedding = embeddings[0]
 
-            # Verify collection exists and has data
-            client = await self._get_qdrant_client()
-
-            try:
-                collection_info = await client.get(f"/collections/{QDRANT_COLLECTION}")
-                if collection_info.status_code == 200:
-                    info = collection_info.json()
-                    points_count = info.get("result", {}).get("points_count", 0)
-                    if points_count == 0:
-                        logger.error(f"[{chat_id}] Collection is empty")
-                        return None
-            except Exception as e:
-                logger.warning(f"[{chat_id}] Could not verify collection: {e}")
-
-            # Search Qdrant collection
-            client = await self._get_qdrant_client()
-
-            search_payload = {
-                "vector": query_embedding,
-                "limit": top_k,
-                "score_threshold": SEMANTIC_SEARCH_THRESHOLD,
-                "with_payload": True,
-            }
-
-            response = await client.post(
-                f"/collections/{QDRANT_COLLECTION}/points/search",
-                json=search_payload,
-            )
+            # Create Qdrant client with proper resource cleanup via context manager
+            qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}"
+            async with httpx.AsyncClient(
+                base_url=qdrant_url, timeout=QDRANT_TIMEOUT
+            ) as client:
+                # Verify collection exists and has data
+                try:
+                    collection_info = await client.get(
+                        f"/collections/{QDRANT_COLLECTION}"
+                    )
+                    if collection_info.status_code == 200:
+                        info = collection_info.json()
+                        points_count = info.get("result", {}).get("points_count", 0)
+                        if points_count == 0:
+                            logger.error(f"[{chat_id}] Collection is empty")
+                            return None
+                except Exception as e:
+                    logger.warning(f"[{chat_id}] Could not verify collection: {e}")
+
+                # Search Qdrant collection
+                search_payload = {
+                    "vector": query_embedding,
+                    "limit": top_k,
+                    "score_threshold": SEMANTIC_SEARCH_THRESHOLD,
+                    "with_payload": True,
+                }
 
-            if response.status_code != 200:
-                logger.error(
-                    f"[{chat_id}] Qdrant search failed: HTTP {response.status_code}"
+                response = await client.post(
+                    f"/collections/{QDRANT_COLLECTION}/points/search",
+                    json=search_payload,
                 )
-                return None
 
-            search_results = response.json()
-            points = search_results.get("result", [])
-
-            if len(points) == 0:
-                logger.warning(
-                    f"[{chat_id}] No services matched (threshold={SEMANTIC_SEARCH_THRESHOLD})"
-                )
-                return None
+                if response.status_code != 200:
+                    logger.error(
+                        f"[{chat_id}] Qdrant search failed: HTTP {response.status_code}"
+                    )
+                    return None
 
-            # Transform Qdrant results to service format
-            services: List[Dict[str, Any]] = []
-            for point in points:
-                payload = point.get("payload", {})
-                score = float(point.get("score", 0))
-
-                service = {
-                    "serviceId": payload.get("service_id"),
-                    "service_id": payload.get("service_id"),
-                    "name": payload.get("name"),
-                    "description": payload.get("description"),
-                    "examples": payload.get("examples", []),
-                    "entities": payload.get("entities", []),
-                    # Note: endpoint not stored in intent_collections,
-                    # will be resolved via database lookup if needed
-                    "similarity_score": score,
-                }
-                services.append(service)
+                search_results = response.json()
+                points = search_results.get("result", [])
 
-            logger.info(
-                f"[{chat_id}] Found {len(services)} services via semantic search"
-            )
-            return services
+                if len(points) == 0:
+                    logger.warning(
+                        f"[{chat_id}] No services matched (threshold={SEMANTIC_SEARCH_THRESHOLD})"
+                    )
+                    return None
+
+                # Transform Qdrant results to service format
+                services: List[Dict[str, Any]] = []
+                for point in points:
+                    payload = point.get("payload", {})
+                    score = float(point.get("score", 0))
+
+                    service = {
+                        "serviceId": payload.get("service_id"),
+                        "service_id": payload.get("service_id"),
+                        "name": payload.get("name"),
+                        "description": payload.get("description"),
+                        "examples": payload.get("examples", []),
+                        "entities": payload.get("entities", []),
+                        # Note: endpoint not stored in intent_collections,
+                        # will be resolved via database lookup if needed
+                        "similarity_score": score,
+                    }
+                    services.append(service)
+
+                logger.info(
+                    f"[{chat_id}] Found {len(services)} services via semantic search"
+                )
+                return services
 
         except Exception as e:
             logger.error(f"[{chat_id}] Semantic search failed: {e}", exc_info=True)
@@ -291,6 +288,53 @@ def _validate_detected_service(
         )
         return None
 
+    async def _process_intent_detection(
+        self,
+        services: List[Dict[str, Any]],
+        request: OrchestrationRequest,
+        chat_id: str,
+        context: Dict[str, Any],
+        costs_dict: Dict[str, Dict[str, Any]],
+    ) -> None:
+        """Detect intent, validate service, and populate context.
+
+        This helper method encapsulates the common logic of:
+        1. Calling intent detection (LLM)
+        2. Tracking costs
+        3. Validating matched service
+        4. Populating context with service metadata
+
+        Args:
+            services: List of services to match against
+            request: Orchestration request
+            chat_id: Chat ID for logging
+            context: Context dict to populate with results
+            costs_dict: Dictionary to track LLM costs
+        """
+        intent_result, intent_usage = await self._detect_service_intent(
+            user_query=request.message,
+            services=services,
+            conversation_history=request.conversationHistory,
+            chat_id=chat_id,
+        )
+        costs_dict["intent_detection"] = intent_usage
+
+        if intent_result and intent_result.get("matched_service_id"):
+            service_id = intent_result["matched_service_id"]
+            logger.info(f"[{chat_id}] Matched: {service_id}")
+
+            validated_service = self._validate_detected_service(
+                matched_service_id=service_id,
+                services=services,
+                chat_id=chat_id,
+            )
+
+            if validated_service:
+                context["service_id"] = service_id
+                context["confidence"] = intent_result.get("confidence", 0.0)
+                context["entities"] = intent_result.get("entities", {})
+                context["service_data"] = validated_service
+
     def _extract_service_metadata(
         self, context: Dict[str, Any], chat_id: str
     ) -> Optional[Dict[str, Any]]:
@@ -478,56 +522,24 @@ async def _log_request_details(
                         services = []
 
                 if services:
-                    intent_result, intent_usage = await self._detect_service_intent(
-                        user_query=request.message,
+                    await self._process_intent_detection(
                         services=services,
-                        conversation_history=request.conversationHistory,
+                        request=request,
                         chat_id=chat_id,
+                        context=context,
+                        costs_dict=costs_dict,
                     )
-                    costs_dict["intent_detection"] = intent_usage
-
-                    if intent_result and intent_result.get("matched_service_id"):
-                        service_id = intent_result["matched_service_id"]
-                        logger.info(f"[{chat_id}] Matched: {service_id}")
-
-                        validated_service = self._validate_detected_service(
-                            matched_service_id=service_id,
-                            services=services,
-                            chat_id=chat_id,
-                        )
-
-                        if validated_service:
-                            context["service_id"] = service_id
-                            context["confidence"] = intent_result.get("confidence", 0.0)
-                            context["entities"] = intent_result.get("entities", {})
-                            context["service_data"] = validated_service
             else:
                 services = response_data.get("services", [])
 
                 if services:
-                    intent_result, intent_usage = await self._detect_service_intent(
-                        user_query=request.message,
+                    await self._process_intent_detection(
                         services=services,
-                        conversation_history=request.conversationHistory,
+                        request=request,
                         chat_id=chat_id,
+                        context=context,
+                        costs_dict=costs_dict,
                     )
-                    costs_dict["intent_detection"] = intent_usage
-
-                    if intent_result and intent_result.get("matched_service_id"):
-                        service_id = intent_result["matched_service_id"]
-                        logger.info(f"[{chat_id}] Matched: {service_id}")
-
-                        validated_service = self._validate_detected_service(
-                            matched_service_id=service_id,
-                            services=services,
-                            chat_id=chat_id,
-                        )
-
-                        if validated_service:
-                            context["service_id"] = service_id
-                            context["confidence"] = intent_result.get("confidence", 0.0)
-                            context["entities"] = intent_result.get("entities", {})
-                            context["service_data"] = validated_service
         else:
             logger.warning(f"[{chat_id}] Service discovery failed")
 

From 25f9614a7b451426b78ae03f23b6cb0ab65535e0 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 24 Feb 2026 18:43:21 +0530
Subject: [PATCH 07/27] fixed issues

---
 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md | 5 +++--
 src/tool_classifier/constants.py             | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
index bb8ad44..6be3f8e 100644
--- a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
+++ b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -440,8 +440,9 @@ def _transform_entities_to_array(
 ### Endpoint Construction
 
 ```python
-endpoint_url = f"{RUUTER_BASE_URL}{service_metadata['ruuter_url']}"
-# Example: "http://ruuter:8080/currency/convert"
+endpoint_url = f"{RUUTER_BASE_URL}/services/active{service_metadata['service_name']}"
+# Example: "http://ruuter:8080/services/active/currency-conversion"
+# (Note: service_name from service metadata, e.g., "/currency-conversion")
 ```
 
 ### Payload Construction (Planned)
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index e967e3c..c885b52 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -8,7 +8,7 @@
 QDRANT_HOST = "qdrant"
 """Qdrant server hostname."""
 
-QDRANT_PORT = int("6333")
+QDRANT_PORT = 6333
 """Qdrant server port."""
 
 QDRANT_TIMEOUT = 10.0
@@ -38,7 +38,6 @@
 """Base URL for Ruuter private service endpoints."""
 
 RAG_SEARCH_RUUTER_PUBLIC = "http://ruuter-public:8086/rag-search"
-
 """Public Ruuter endpoint for RAG search service discovery."""
 
 SERVICE_CALL_TIMEOUT = 10

From 69c12799bd7853d6b4fbd9d4cf5f0fff15bad945 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 24 Feb 2026 20:17:04 +0530
Subject: [PATCH 08/27] protocol related requested changes

---
 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md  |  6 +-
 docs/TOOL_CLASSIFIER_SKELETON_USAGE.md        |  4 +-
 src/llm_orchestration_service.py              | 82 +++++++++----------
 src/tool_classifier/workflows/ood_workflow.py |  4 +-
 src/tool_classifier/workflows/rag_workflow.py |  2 +-
 .../workflows/service_workflow.py             | 12 +--
 6 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
index 6be3f8e..afd4303 100644
--- a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
+++ b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -246,7 +246,7 @@ intent_result = intent_module.forward(...)
 usage_info = get_lm_usage_since(history_length_before)
 costs_dict["intent_detection"] = usage_info
 
-# Later: orchestration_service._log_costs(costs_dict)
+# Later: orchestration_service.log_costs(costs_dict)
 ```
 
 ---
@@ -468,7 +468,7 @@ response = await httpx.post(
 # Streaming
 async with httpx.stream("POST", endpoint_url, json=payload) as stream:
     async for line in stream.aiter_lines():
-        yield orchestration_service._format_sse(chat_id, line)
+        yield orchestration_service.format_sse(chat_id, line)
 ```
 
 ---
@@ -564,7 +564,7 @@ intent_result, intent_usage = await _detect_service_intent(...)
 costs_dict["intent_detection"] = intent_usage
 
 # Log costs after workflow completes
-orchestration_service._log_costs(costs_dict)
+orchestration_service.log_costs(costs_dict)
 ```
 
 **Cost Breakdown Logged:**
diff --git a/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md b/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md
index 9dc87c8..38ce1f5 100644
--- a/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md
+++ b/docs/TOOL_CLASSIFIER_SKELETON_USAGE.md
@@ -361,9 +361,9 @@ class MyCustomWorkflow(BaseWorkflow):
         # Stream result token-by-token
         async def stream_result():
             for chunk in self._split_into_tokens(result):
-                yield self._format_sse(request.chatId, chunk)
+                yield self.format_sse(request.chatId, chunk)
                 await asyncio.sleep(0.01)
-            yield self._format_sse(request.chatId, "END")
+            yield self.format_sse(request.chatId, "END")
         
         return stream_result()
 ```
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 3c059f5..e2eb0c9 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -274,7 +274,7 @@ async def process_orchestration_request(
                 )
 
             # Log final costs and return response
-            self._log_costs(costs_dict)
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
 
             # Update budget for the LLM connection
@@ -331,7 +331,7 @@ async def process_orchestration_request(
                     }
                 )
                 langfuse.flush()
-            self._log_costs(costs_dict)
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
 
             # Update budget even on error
@@ -405,8 +405,8 @@ async def stream_orchestration_response(
             )
 
             # Yield SSE format error + END marker
-            yield self._format_sse(request.chatId, validation_msg)
-            yield self._format_sse(request.chatId, "END")
+            yield self.format_sse(request.chatId, validation_msg)
+            yield self.format_sse(request.chatId, "END")
             return  # Stop processing
 
         # Use StreamManager for centralized tracking and guaranteed cleanup
@@ -441,11 +441,11 @@ async def stream_orchestration_response(
                             f"[{request.chatId}] [{stream_ctx.stream_id}] Input blocked by guardrails: "
                             f"{input_check_result.reason}"
                         )
-                        yield self._format_sse(
+                        yield self.format_sse(
                             request.chatId, INPUT_GUARDRAIL_VIOLATION_MESSAGE
                         )
-                        yield self._format_sse(request.chatId, "END")
-                        self._log_costs(costs_dict)
+                        yield self.format_sse(request.chatId, "END")
+                        self.log_costs(costs_dict)
                         stream_ctx.mark_completed()
                         return
 
@@ -500,7 +500,7 @@ async def stream_orchestration_response(
                         )
 
                         # Log costs and timings
-                        self._log_costs(costs_dict)
+                        self.log_costs(costs_dict)
                         log_step_timings(timing_dict, request.chatId)
                         stream_ctx.mark_completed()
                         return  # Exit after successful classifier routing
@@ -546,10 +546,10 @@ async def stream_orchestration_response(
                     logger, error_id, "streaming_orchestration", request.chatId, e
                 )
 
-                yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE)
-                yield self._format_sse(request.chatId, "END")
+                yield self.format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE)
+                yield self.format_sse(request.chatId, "END")
 
-                self._log_costs(costs_dict)
+                self.log_costs(costs_dict)
                 log_step_timings(timing_dict, request.chatId)
 
                 # Update budget even on outer exception
@@ -645,9 +645,9 @@ async def _stream_rag_pipeline(
             localized_msg = get_localized_message(
                 OUT_OF_SCOPE_MESSAGES, detected_language
             )
-            yield self._format_sse(request.chatId, localized_msg)
-            yield self._format_sse(request.chatId, "END")
-            self._log_costs(costs_dict)
+            yield self.format_sse(request.chatId, localized_msg)
+            yield self.format_sse(request.chatId, "END")
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
             stream_ctx.mark_completed()
             return
@@ -659,9 +659,9 @@ async def _stream_rag_pipeline(
             localized_msg = get_localized_message(
                 OUT_OF_SCOPE_MESSAGES, detected_language
             )
-            yield self._format_sse(request.chatId, localized_msg)
-            yield self._format_sse(request.chatId, "END")
-            self._log_costs(costs_dict)
+            yield self.format_sse(request.chatId, localized_msg)
+            yield self.format_sse(request.chatId, "END")
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
             stream_ctx.mark_completed()
             return
@@ -690,9 +690,9 @@ async def _stream_rag_pipeline(
             localized_msg = get_localized_message(
                 OUT_OF_SCOPE_MESSAGES, detected_language
             )
-            yield self._format_sse(request.chatId, localized_msg)
-            yield self._format_sse(request.chatId, "END")
-            self._log_costs(costs_dict)
+            yield self.format_sse(request.chatId, localized_msg)
+            yield self.format_sse(request.chatId, "END")
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
             stream_ctx.mark_completed()
             return
@@ -755,14 +755,14 @@ async def bot_response_generator() -> AsyncIterator[str]:
                                 f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded: "
                                 f"{stream_ctx.token_count} > {StreamConfig.MAX_TOKENS_PER_STREAM}"
                             )
-                            yield self._format_sse(
+                            yield self.format_sse(
                                 request.chatId, STREAM_TOKEN_LIMIT_MESSAGE
                             )
-                            yield self._format_sse(request.chatId, "END")
+                            yield self.format_sse(request.chatId, "END")
 
                             usage_info = get_lm_usage_since(history_length_before)
                             costs_dict["streaming_generation"] = usage_info
-                            self._log_costs(costs_dict)
+                            self.log_costs(costs_dict)
                             log_step_timings(timing_dict, request.chatId)
                             stream_ctx.mark_completed()
                             return
@@ -784,20 +784,20 @@ async def bot_response_generator() -> AsyncIterator[str]:
                             logger.warning(
                                 f"[{request.chatId}] [{stream_ctx.stream_id}] Guardrails violation detected"
                             )
-                            yield self._format_sse(
+                            yield self.format_sse(
                                 request.chatId, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE
                             )
-                            yield self._format_sse(request.chatId, "END")
+                            yield self.format_sse(request.chatId, "END")
 
                             usage_info = get_lm_usage_since(history_length_before)
                             costs_dict["streaming_generation"] = usage_info
-                            self._log_costs(costs_dict)
+                            self.log_costs(costs_dict)
                             log_step_timings(timing_dict, request.chatId)
                             stream_ctx.mark_completed()
                             return
 
                         # Yield the validated chunk to client
-                        yield self._format_sse(request.chatId, validated_chunk)
+                        yield self.format_sse(request.chatId, validated_chunk)
                 except GeneratorExit:
                     stream_ctx.mark_cancelled()
                     logger.info(
@@ -816,9 +816,9 @@ async def bot_response_generator() -> AsyncIterator[str]:
                         f"{i + 1}. [{ref.document_url}]({ref.document_url})"
                         for i, ref in enumerate(doc_references)
                     )
-                    yield self._format_sse(request.chatId, refs_text)
+                    yield self.format_sse(request.chatId, refs_text)
 
-                yield self._format_sse(request.chatId, "END")
+                yield self.format_sse(request.chatId, "END")
 
             else:
                 # No guardrails - stream directly
@@ -837,14 +837,14 @@ async def bot_response_generator() -> AsyncIterator[str]:
                         logger.error(
                             f"[{request.chatId}] [{stream_ctx.stream_id}] Token limit exceeded (no guardrails)"
                         )
-                        yield self._format_sse(
+                        yield self.format_sse(
                             request.chatId, STREAM_TOKEN_LIMIT_MESSAGE
                         )
-                        yield self._format_sse(request.chatId, "END")
+                        yield self.format_sse(request.chatId, "END")
                         stream_ctx.mark_completed()
                         return
 
-                    yield self._format_sse(request.chatId, token)
+                    yield self.format_sse(request.chatId, token)
 
                 # Send document references before END token
                 doc_references = self._extract_document_references(relevant_chunks)
@@ -853,9 +853,9 @@ async def bot_response_generator() -> AsyncIterator[str]:
                         f"{i + 1}. [{ref.document_url}]({ref.document_url})"
                         for i, ref in enumerate(doc_references)
                     )
-                    yield self._format_sse(request.chatId, refs_text)
+                    yield self.format_sse(request.chatId, refs_text)
 
-                yield self._format_sse(request.chatId, "END")
+                yield self.format_sse(request.chatId, "END")
 
             # Extract usage information after streaming completes
             usage_info = get_lm_usage_since(history_length_before)
@@ -872,7 +872,7 @@ async def bot_response_generator() -> AsyncIterator[str]:
             )
 
             # Log costs and trace
-            self._log_costs(costs_dict)
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
 
             # Update budget
@@ -935,7 +935,7 @@ async def bot_response_generator() -> AsyncIterator[str]:
             )
             usage_info = get_lm_usage_since(history_length_before)
             costs_dict["streaming_generation"] = usage_info
-            self._log_costs(costs_dict)
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
 
             # Update budget even on client disconnect
@@ -953,12 +953,12 @@ async def bot_response_generator() -> AsyncIterator[str]:
                 request.chatId,
                 stream_error,
             )
-            yield self._format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE)
-            yield self._format_sse(request.chatId, "END")
+            yield self.format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE)
+            yield self.format_sse(request.chatId, "END")
 
             usage_info = get_lm_usage_since(history_length_before)
             costs_dict["streaming_generation"] = usage_info
-            self._log_costs(costs_dict)
+            self.log_costs(costs_dict)
             log_step_timings(timing_dict, request.chatId)
 
             # Update budget even on streaming error
@@ -966,7 +966,7 @@ async def bot_response_generator() -> AsyncIterator[str]:
                 request.connection_id, costs_dict, request.environment
             )
 
-    def _format_sse(self, chat_id: str, content: str) -> str:
+    def format_sse(self, chat_id: str, content: str) -> str:
         """
         Format SSE message with exact specification.
 
@@ -1885,7 +1885,7 @@ async def _check_output_guardrails(
                 usage={},
             )
 
-    def _log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
+    def log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
         """
         Log cost information for tracking.
 
diff --git a/src/tool_classifier/workflows/ood_workflow.py b/src/tool_classifier/workflows/ood_workflow.py
index fed467a..cd114f7 100644
--- a/src/tool_classifier/workflows/ood_workflow.py
+++ b/src/tool_classifier/workflows/ood_workflow.py
@@ -104,9 +104,9 @@ async def execute_streaming(
         # Stream message for UX consistency (no guardrails needed - fixed message)
         async def stream_ood_message():
             for chunk in split_into_tokens(ood_message, chunk_size=5):
-                yield self._format_sse(request.chatId, chunk)
+                yield self.format_sse(request.chatId, chunk)
                 await asyncio.sleep(0.01)
-            yield self._format_sse(request.chatId, "END")
+            yield self.format_sse(request.chatId, "END")
 
         return stream_ood_message()
         ```
diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py
index d83080a..6c58648 100644
--- a/src/tool_classifier/workflows/rag_workflow.py
+++ b/src/tool_classifier/workflows/rag_workflow.py
@@ -87,7 +87,7 @@ async def execute_async(
         )
 
         # Log costs and timings
-        self.orchestration_service._log_costs(costs_dict)
+        self.orchestration_service.log_costs(costs_dict)
         from src.utils.time_tracker import log_step_timings
 
         log_step_timings(timing_dict, request.chatId)
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index 9cd8a76..97d9804 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -52,7 +52,7 @@ def create_embeddings_for_indexer(
         """
         ...
 
-    def _format_sse(self, chat_id: str, content: str) -> str:
+    def format_sse(self, chat_id: str, content: str) -> str:
         """Format content as SSE message.
 
         Args:
@@ -64,7 +64,7 @@ def _format_sse(self, chat_id: str, content: str) -> str:
         """
         ...
 
-    def _log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
+    def log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
         """Log cost information for tracking.
 
         Args:
@@ -656,7 +656,7 @@ async def execute_async(
 
         # Log costs after service workflow completes (follows RAG workflow pattern)
         if self.orchestration_service:
-            self.orchestration_service._log_costs(costs_dict)
+            self.orchestration_service.log_costs(costs_dict)
 
         return OrchestrationResponse(
             chatId=request.chatId,
@@ -785,12 +785,12 @@ async def execute_streaming(
         orchestration_service = self.orchestration_service
 
         async def debug_stream() -> AsyncIterator[str]:
-            yield orchestration_service._format_sse(chat_id, debug_content)
-            yield orchestration_service._format_sse(chat_id, "END")
+            yield orchestration_service.format_sse(chat_id, debug_content)
+            yield orchestration_service.format_sse(chat_id, "END")
 
             # Log costs after streaming completes (follows RAG workflow pattern)
             # Must be inside generator because costs are accumulated during streaming
-            orchestration_service._log_costs(costs_dict)
+            orchestration_service.log_costs(costs_dict)
 
         return debug_stream()
         # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)

From 07f2e0f1c6c1d4612f7e7d2b64eef3bec54af757 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 24 Feb 2026 20:25:56 +0530
Subject: [PATCH 09/27] fixed requested changes

---
 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md      | 2 +-
 src/tool_classifier/classifier.py                 | 6 ++++--
 src/tool_classifier/workflows/service_workflow.py | 6 +++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
index afd4303..15669e4 100644
--- a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
+++ b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -440,7 +440,7 @@ def _transform_entities_to_array(
 ### Endpoint Construction
 
 ```python
-endpoint_url = f"{RUUTER_BASE_URL}/services/active{service_metadata['service_name']}"
+endpoint_url = f"{RUUTER_BASE_URL}/services/active{service_name}"
 # Example: "http://ruuter:8080/services/active/currency-conversion"
 # (Note: service_name from service metadata, e.g., "/currency-conversion")
 ```
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index ec9dc95..c8bef8a 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -228,7 +228,8 @@ async def _execute_with_fallback_async(
                 next_name = WORKFLOW_DISPLAY_NAMES.get(next_layer, next_layer.value)
 
                 logger.info(
-                    f"[{chat_id}] Falling back to {next_name} (Layer {current_index + 2})"
+                    f"[{chat_id}] Falling back to {next_name} "
+                    f"(Layer {WORKFLOW_LAYER_ORDER.index(next_layer) + 1})"
                 )
 
                 result = await next_workflow.execute_async(request, {})
@@ -300,9 +301,10 @@ async def _execute_with_fallback_streaming(
                 next_workflow = self._get_workflow_executor(next_layer)
                 next_name = WORKFLOW_DISPLAY_NAMES.get(next_layer, next_layer.value)
 
+                layer_number = WORKFLOW_LAYER_ORDER.index(next_layer) + 1
                 logger.info(
                     f"[{chat_id}] Falling back to {next_name} streaming "
-                    f"(Layer {current_index + 2})"
+                    f"(Layer {layer_number})"
                 )
 
                 result = await next_workflow.execute_streaming(request, {})
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index 97d9804..d71e2d9 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -205,7 +205,9 @@ async def _call_service_discovery(self, chat_id: str) -> Optional[Dict[str, Any]
                 data = response.json()
                 return data
         except httpx.TimeoutException:
-            logger.error(f"[{chat_id}] Service discovery timeout after 10s")
+            logger.error(
+                f"[{chat_id}] Service discovery timeout after {SERVICE_DISCOVERY_TIMEOUT}s"
+            )
             return None
         except httpx.HTTPStatusError as e:
             logger.error(
@@ -641,7 +643,6 @@ async def execute_async(
         # 3. Parse Ruuter response and extract result
         # 4. Return OrchestrationResponse with actual service result
         # 5. Handle errors (timeout, HTTP errors, malformed JSON)
-        # 6. Remove debug response code below (lines 589-601) after implementation
 
         # STEP 6: Return debug response (temporary until Step 7 - Ruuter call implemented)
         # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)
@@ -765,7 +766,6 @@ async def execute_streaming(
         # 3. Parse Ruuter response and extract result
         # 4. Format result as SSE and yield chunks
         # 5. Handle errors (timeout, HTTP errors, malformed JSON)
-        # 6. Remove debug response code below (lines 697-709) after implementation
 
         # STEP 6: Return debug response as async iterator (temporary until Step 7)
         # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)

From f63f777582c22bba3524e38a2d916096fe134380 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Wed, 25 Feb 2026 08:46:52 +0530
Subject: [PATCH 10/27] update time tracking

---
 src/llm_orchestration_service.py              | 67 +++++++++++++++----
 src/tool_classifier/base_workflow.py          |  4 ++
 src/tool_classifier/classifier.py             | 38 +++++++++--
 .../workflows/context_workflow.py             |  4 ++
 src/tool_classifier/workflows/ood_workflow.py |  3 +
 src/tool_classifier/workflows/rag_workflow.py | 20 ++++--
 .../workflows/service_workflow.py             | 38 +++++++++--
 src/utils/time_tracker.py                     |  8 +++
 8 files changed, 151 insertions(+), 31 deletions(-)

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index e2eb0c9..2705a29 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -170,9 +170,11 @@ async def process_orchestration_request(
                 f"authorId: {request.authorId}, environment: {request.environment}"
             )
 
-            # STEP 0: Detect language from user message
+            # STEP 0: Detect language from user message (with timing)
+            start_time = time.time()
             detected_language = detect_language(request.message)
             language_name = get_language_name(detected_language)
+            timing_dict["language_detection"] = time.time() - start_time
             logger.info(
                 f"[{request.chatId}] Detected language: {language_name} ({detected_language})"
             )
@@ -182,7 +184,9 @@ async def process_orchestration_request(
             setattr(request, "_detected_language", detected_language)
 
             # STEP 0.5: Basic Query Validation (before expensive component initialization)
+            start_time = time.time()
             validation_result = validate_query_basic(request.message)
+            timing_dict["query_validation"] = time.time() - start_time
             if not validation_result.is_valid:
                 logger.info(
                     f"[{request.chatId}] Query validation failed: {validation_result.rejection_reason}"
@@ -210,8 +214,10 @@ async def process_orchestration_request(
                         content=validation_msg,
                     )
 
-            # Initialize all service components (only for valid queries)
+            # Initialize all service components (only for valid queries, with timing)
+            start_time = time.time()
             components = self._initialize_service_components(request)
+            timing_dict["initialization"] = time.time() - start_time
 
             # TOOL CLASSIFIER INTEGRATION
             # Route through tool classifier if enabled, otherwise use existing RAG pipeline
@@ -229,24 +235,29 @@ async def process_orchestration_request(
                         )
                         logger.info("Tool classifier initialized")
 
-                    # Classify query to determine workflow
+                    # Classify query to determine workflow (with timing)
+                    start_time = time.time()
                     classification = await self.tool_classifier.classify(
                         query=request.message,
                         conversation_history=request.conversationHistory,
                         language=detected_language,
                     )
+                    timing_dict["classifier.classify"] = time.time() - start_time
 
                     logger.info(
                         f"[{request.chatId}] Classification: {classification.workflow.value} "
                         f"(confidence: {classification.confidence:.2f})"
                     )
 
-                    # Route to appropriate workflow
+                    # Route to appropriate workflow (with timing)
+                    start_time = time.time()
                     response = await self.tool_classifier.route_to_workflow(
                         classification=classification,
                         request=request,
                         is_streaming=False,
+                        timing_dict=timing_dict,
                     )
+                    timing_dict["classifier.route"] = time.time() - start_time
 
                 except Exception as classifier_error:
                     logger.error(
@@ -382,9 +393,11 @@ async def stream_orchestration_response(
         costs_dict: Dict[str, Dict[str, Any]] = {}
         timing_dict: Dict[str, float] = {}
 
-        # STEP 0: Detect language from user message
+        # STEP 0: Detect language from user message (with timing)
+        start_time = time.time()
         detected_language = detect_language(request.message)
         language_name = get_language_name(detected_language)
+        timing_dict["language_detection"] = time.time() - start_time
         logger.info(
             f"[{request.chatId}] Streaming request - Detected language: {language_name} ({detected_language})"
         )
@@ -393,8 +406,10 @@ async def stream_orchestration_response(
         # Using setattr for type safety - adds dynamic attribute to Pydantic model instance
         setattr(request, "_detected_language", detected_language)
 
-        # Step 0.5: Basic Query Validation (before guardrails)
+        # Step 0.5: Basic Query Validation (before guardrails, with timing)
+        start_time = time.time()
         validation_result = validate_query_basic(request.message)
+        timing_dict["query_validation"] = time.time() - start_time
         if not validation_result.is_valid:
             logger.info(
                 f"[{request.chatId}] Streaming - Query validation failed: {validation_result.rejection_reason}"
@@ -419,8 +434,10 @@ async def stream_orchestration_response(
                     f"(environment: {request.environment})"
                 )
 
-                # Initialize all service components
+                # Initialize all service components (with timing)
+                start_time = time.time()
                 components = self._initialize_service_components(request)
+                timing_dict["initialization"] = time.time() - start_time
 
                 # STEP 1: CHECK INPUT GUARDRAILS (blocking)
                 logger.info(
@@ -1114,8 +1131,17 @@ async def _execute_orchestration_pipeline(
         components: Dict[str, Any],
         costs_dict: Dict[str, Dict[str, Any]],
         timing_dict: Dict[str, float],
+        prefix: str = "",
     ) -> Union[OrchestrationResponse, TestOrchestrationResponse]:
-        """Execute the main orchestration pipeline with all components."""
+        """Execute the main orchestration pipeline with all components.
+
+        Args:
+            request: Orchestration request
+            components: Initialized service components
+            costs_dict: Dictionary for cost tracking
+            timing_dict: Dictionary for timing tracking
+            prefix: Optional prefix for timing keys (e.g., "rag" for workflow namespacing)
+        """
         # Note: Query validation now happens in process_orchestration_request()
         # before component initialization for true early rejection
 
@@ -1125,7 +1151,12 @@ async def _execute_orchestration_pipeline(
             input_blocked_response = await self.handle_input_guardrails(
                 components["guardrails_adapter"], request, costs_dict
             )
-            timing_dict["input_guardrails_check"] = time.time() - start_time
+            timing_key = (
+                f"{prefix}.input_guardrails_check"
+                if prefix
+                else "input_guardrails_check"
+            )
+            timing_dict[timing_key] = time.time() - start_time
             if input_blocked_response:
                 return input_blocked_response
 
@@ -1136,7 +1167,8 @@ async def _execute_orchestration_pipeline(
             original_message=request.message,
             conversation_history=request.conversationHistory,
         )
-        timing_dict["prompt_refiner"] = time.time() - start_time
+        timing_key = f"{prefix}.prompt_refiner" if prefix else "prompt_refiner"
+        timing_dict[timing_key] = time.time() - start_time
         costs_dict["prompt_refiner"] = refiner_usage
 
         # Step 3: Retrieve relevant chunks using contextual retrieval
@@ -1145,7 +1177,10 @@ async def _execute_orchestration_pipeline(
             relevant_chunks = await self._safe_retrieve_contextual_chunks(
                 components["contextual_retriever"], refined_output, request
             )
-            timing_dict["contextual_retrieval"] = time.time() - start_time
+            timing_key = (
+                f"{prefix}.contextual_retrieval" if prefix else "contextual_retrieval"
+            )
+            timing_dict[timing_key] = time.time() - start_time
         except (
             ContextualRetrieverInitializationError,
             ContextualRetrievalFailureError,
@@ -1168,7 +1203,10 @@ async def _execute_orchestration_pipeline(
             response_generator=components["response_generator"],
             costs_dict=costs_dict,
         )
-        timing_dict["response_generation"] = time.time() - start_time
+        timing_key = (
+            f"{prefix}.response_generation" if prefix else "response_generation"
+        )
+        timing_dict[timing_key] = time.time() - start_time
 
         # Step 5: Output Guardrails Check
         # Apply guardrails to all response types for consistent safety across all environments
@@ -1179,7 +1217,10 @@ async def _execute_orchestration_pipeline(
             request,
             costs_dict,
         )
-        timing_dict["output_guardrails_check"] = time.time() - start_time
+        timing_key = (
+            f"{prefix}.output_guardrails_check" if prefix else "output_guardrails_check"
+        )
+        timing_dict[timing_key] = time.time() - start_time
 
         # Step 6: Store inference data (for production and testing environments)
         # Only store OrchestrationResponse (has chatId), not TestOrchestrationResponse
diff --git a/src/tool_classifier/base_workflow.py b/src/tool_classifier/base_workflow.py
index 50faf7a..45886fa 100644
--- a/src/tool_classifier/base_workflow.py
+++ b/src/tool_classifier/base_workflow.py
@@ -33,6 +33,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute workflow in non-streaming mode.
@@ -43,6 +44,7 @@ async def execute_async(
         Args:
             request: The orchestration request containing user query and context
             context: Workflow-specific metadata from ClassificationResult.metadata
+            timing_dict: Optional dictionary for tracking step execution times
 
         Returns:
             OrchestrationResponse if workflow can handle this query
@@ -68,6 +70,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute workflow in streaming mode (Server-Sent Events).
@@ -78,6 +81,7 @@ async def execute_streaming(
         Args:
             request: The orchestration request containing user query and context
             context: Workflow-specific metadata from ClassificationResult.metadata
+            timing_dict: Optional dictionary for tracking step execution times
 
         Returns:
             AsyncIterator[str] yielding SSE-formatted strings if workflow can handle
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index c8bef8a..ab9e402 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -1,6 +1,6 @@
 """Main tool classifier for workflow routing."""
 
-from typing import Any, AsyncIterator, Dict, List, Literal, Union, overload
+from typing import Any, AsyncIterator, Dict, List, Literal, Optional, Union, overload
 from loguru import logger
 
 from models.request_models import (
@@ -106,6 +106,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: Literal[False] = False,
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> OrchestrationResponse: ...
 
     @overload
@@ -114,6 +115,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: Literal[True],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> AsyncIterator[str]: ...
 
     async def route_to_workflow(
@@ -121,6 +123,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: bool = False,
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Union[OrchestrationResponse, AsyncIterator[str]]:
         """
         Route request to appropriate workflow based on classification.
@@ -132,6 +135,7 @@ async def route_to_workflow(
             classification: Classification result from classify()
             request: Original orchestration request
             is_streaming: Whether to use streaming mode (for /orchestrate/stream)
+            timing_dict: Optional timing dictionary for workflow step tracking
 
         Returns:
             OrchestrationResponse for non-streaming mode
@@ -162,6 +166,7 @@ async def route_to_workflow(
                 request=request,
                 context=classification.metadata,
                 start_layer=classification.workflow,
+                timing_dict=timing_dict,
             )
         else:
             # NON-STREAMING MODE: For /orchestrate and /orchestrate/test endpoints
@@ -170,6 +175,7 @@ async def route_to_workflow(
                 request=request,
                 context=classification.metadata,
                 start_layer=classification.workflow,
+                timing_dict=timing_dict,
             )
 
     def _get_workflow_executor(self, workflow_type: WorkflowType) -> Any:
@@ -188,6 +194,7 @@ async def _execute_with_fallback_async(
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> OrchestrationResponse:
         """
         Execute workflow with fallback to subsequent layers (non-streaming).
@@ -197,6 +204,13 @@ async def _execute_with_fallback_async(
         2. If returns None, try next layer in WORKFLOW_LAYER_ORDER
         3. Continue until workflow returns non-None result
         4. OOD workflow always returns result (never None)
+
+        Args:
+            workflow: Primary workflow executor
+            request: Orchestration request
+            context: Workflow context/metadata
+            start_layer: Starting workflow type
+            timing_dict: Optional timing dictionary for tracking
         """
         chat_id = request.chatId
         workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value)
@@ -204,7 +218,7 @@ async def _execute_with_fallback_async(
         logger.info(f"[{chat_id}] Executing {workflow_name} (non-streaming)")
 
         try:
-            result = await workflow.execute_async(request, context)
+            result = await workflow.execute_async(request, context, timing_dict)
 
             if result is not None:
                 logger.info(f"[{chat_id}] {workflow_name} handled successfully")
@@ -232,7 +246,7 @@ async def _execute_with_fallback_async(
                     f"(Layer {WORKFLOW_LAYER_ORDER.index(next_layer) + 1})"
                 )
 
-                result = await next_workflow.execute_async(request, {})
+                result = await next_workflow.execute_async(request, {}, timing_dict)
 
                 if result is not None:
                     logger.info(f"[{chat_id}] {next_name} handled successfully")
@@ -248,7 +262,7 @@ async def _execute_with_fallback_async(
             logger.error(f"[{chat_id}] Error executing {workflow_name}: {e}")
             # Fallback to RAG on error
             logger.info(f"[{chat_id}] Falling back to RAG due to error")
-            rag_result = await self.rag_workflow.execute_async(request, {})
+            rag_result = await self.rag_workflow.execute_async(request, {}, timing_dict)
             if rag_result is not None:
                 return rag_result
             else:
@@ -260,6 +274,7 @@ async def _execute_with_fallback_streaming(
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> AsyncIterator[str]:
         """
         Execute workflow with fallback to subsequent layers (streaming).
@@ -269,6 +284,13 @@ async def _execute_with_fallback_streaming(
         2. If returns None, try next layer in WORKFLOW_LAYER_ORDER
         3. Stream from the first workflow that returns non-None
         4. OOD workflow always returns result (never None)
+
+        Args:
+            workflow: Primary workflow executor
+            request: Orchestration request
+            context: Workflow context/metadata
+            start_layer: Starting workflow type
+            timing_dict: Optional timing dictionary for tracking
         """
         chat_id = request.chatId
         workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value)
@@ -276,7 +298,7 @@ async def _execute_with_fallback_streaming(
         logger.info(f"[{chat_id}] Executing {workflow_name} (streaming)")
 
         try:
-            result = await workflow.execute_streaming(request, context)
+            result = await workflow.execute_streaming(request, context, timing_dict)
 
             if result is not None:
                 logger.info(f"[{chat_id}] {workflow_name} streaming started")
@@ -307,7 +329,7 @@ async def _execute_with_fallback_streaming(
                     f"(Layer {layer_number})"
                 )
 
-                result = await next_workflow.execute_streaming(request, {})
+                result = await next_workflow.execute_streaming(request, {}, timing_dict)
 
                 if result is not None:
                     logger.info(f"[{chat_id}] {next_name} streaming started")
@@ -325,7 +347,9 @@ async def _execute_with_fallback_streaming(
             logger.error(f"[{chat_id}] Error executing {workflow_name} streaming: {e}")
             # Fallback to RAG on error
             logger.info(f"[{chat_id}] Falling back to RAG streaming due to error")
-            streaming_result = await self.rag_workflow.execute_streaming(request, {})
+            streaming_result = await self.rag_workflow.execute_streaming(
+                request, {}, timing_dict
+            )
             if streaming_result is not None:
                 async for chunk in streaming_result:
                     yield chunk
diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py
index 88212ef..4039b23 100644
--- a/src/tool_classifier/workflows/context_workflow.py
+++ b/src/tool_classifier/workflows/context_workflow.py
@@ -35,6 +35,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute context workflow in non-streaming mode.
@@ -45,6 +46,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query and history
             context: Metadata with is_greeting, can_answer_from_history flags
+            timing_dict: Optional timing dictionary for future timing tracking
 
         Returns:
             OrchestrationResponse with context-based answer or None to fallback
@@ -62,6 +64,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute context workflow in streaming mode.
@@ -72,6 +75,7 @@ async def execute_streaming(
         Args:
             request: Orchestration request with user query and history
             context: Metadata with is_greeting, can_answer_from_history flags
+            timing_dict: Optional timing dictionary for future timing tracking
 
         Returns:
             AsyncIterator yielding SSE strings or None to fallback
diff --git a/src/tool_classifier/workflows/ood_workflow.py b/src/tool_classifier/workflows/ood_workflow.py
index cd114f7..c3f9215 100644
--- a/src/tool_classifier/workflows/ood_workflow.py
+++ b/src/tool_classifier/workflows/ood_workflow.py
@@ -39,6 +39,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute OOD workflow in non-streaming mode.
@@ -68,6 +69,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query
             context: Unused (OOD doesn't need metadata)
+            timing_dict: Optional timing dictionary for future timing tracking
 
         Returns:
             OrchestrationResponse with OOD message
@@ -86,6 +88,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute OOD workflow in streaming mode.
diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py
index 6c58648..5c8cd05 100644
--- a/src/tool_classifier/workflows/rag_workflow.py
+++ b/src/tool_classifier/workflows/rag_workflow.py
@@ -50,6 +50,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute RAG workflow in non-streaming mode.
@@ -64,6 +65,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query
             context: Unused (RAG doesn't need classification metadata)
+            timing_dict: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
             OrchestrationResponse with RAG-generated answer
@@ -73,24 +75,24 @@ async def execute_async(
 
         # Initialize components needed for RAG pipeline
         costs_dict: Dict[str, Any] = {}
-        timing_dict: Dict[str, float] = {}
+        # Use parent timing_dict or create new one
+        if timing_dict is None:
+            timing_dict = {}
 
         # Initialize service components
         components = self.orchestration_service._initialize_service_components(request)
 
-        # Call existing RAG pipeline
+        # Call existing RAG pipeline with "rag" prefix for namespacing
         response = await self.orchestration_service._execute_orchestration_pipeline(
             request=request,
             components=components,
             costs_dict=costs_dict,
             timing_dict=timing_dict,
+            prefix="rag",
         )
 
-        # Log costs and timings
+        # Log costs (timing is logged by parent orchestration service)
         self.orchestration_service.log_costs(costs_dict)
-        from src.utils.time_tracker import log_step_timings
-
-        log_step_timings(timing_dict, request.chatId)
 
         return response
 
@@ -98,6 +100,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute RAG workflow in streaming mode.
@@ -116,6 +119,7 @@ async def execute_streaming(
         Args:
             request: Orchestration request with user query
             context: Unused (RAG doesn't need classification metadata)
+            timing_dict: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
             AsyncIterator yielding SSE-formatted strings
@@ -125,7 +129,9 @@ async def execute_streaming(
 
         # Initialize tracking dictionaries
         costs_dict: Dict[str, Any] = {}
-        timing_dict: Dict[str, float] = {}
+        # Use parent timing_dict or create new one
+        if timing_dict is None:
+            timing_dict = {}
 
         # Get components from context if provided, otherwise initialize
         components = context.get("components")
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index d71e2d9..bed97dd 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -549,17 +549,31 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
-        """Execute service workflow in non-streaming mode."""
+        """Execute service workflow in non-streaming mode.
+
+        Args:
+            request: Orchestration request
+            context: Workflow context
+            timing_dict: Optional timing dictionary for unified tracking
+        """
+        import time
+
         chat_id = request.chatId
 
         # Create costs tracking dictionary (follows RAG workflow pattern)
         costs_dict: Dict[str, Dict[str, Any]] = {}
+        # Use parent timing_dict or create new one
+        if timing_dict is None:
+            timing_dict = {}
 
-        # Log comprehensive request details and perform service discovery
+        # Service discovery with timing
+        start_time = time.time()
         await self._log_request_details(
             request, context, mode="non-streaming", costs_dict=costs_dict
         )
+        timing_dict["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):
@@ -573,6 +587,7 @@ async def execute_async(
         logger.info(f"[{chat_id}] Entity Transformation:")
 
         # Step 1: Extract service metadata from context
+        start_time = time.time()
         service_metadata = self._extract_service_metadata(context, chat_id)
         if not service_metadata:
             logger.error(
@@ -596,6 +611,7 @@ async def execute_async(
             service_name=service_metadata["service_name"],
             chat_id=chat_id,
         )
+        timing_dict["service.entity_validation"] = time.time() - start_time
 
         logger.info(
             f"[{chat_id}]   - Validation status: "
@@ -672,17 +688,31 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
+        timing_dict: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
-        """Execute service workflow in streaming mode."""
+        """Execute service workflow in streaming mode.
+
+        Args:
+            request: Orchestration request
+            context: Workflow context
+            timing_dict: Optional timing dictionary for unified tracking
+        """
+        import time
+
         chat_id = request.chatId
 
         # Create costs tracking dictionary (follows RAG workflow pattern)
         costs_dict: Dict[str, Dict[str, Any]] = {}
+        # Use parent timing_dict or create new one
+        if timing_dict is None:
+            timing_dict = {}
 
-        # Log comprehensive request details and perform service discovery
+        # Service discovery with timing
+        start_time = time.time()
         await self._log_request_details(
             request, context, mode="streaming", costs_dict=costs_dict
         )
+        timing_dict["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):
diff --git a/src/utils/time_tracker.py b/src/utils/time_tracker.py
index 5b6d8de..606e530 100644
--- a/src/utils/time_tracker.py
+++ b/src/utils/time_tracker.py
@@ -17,11 +17,19 @@ def log_step_timings(
     if not timing_dict:
         return
 
+    # Parent/composite timings that should be hidden from logs
+    # These are aggregate timings that already include their sub-steps
+    PARENT_TIMINGS = {"classifier.route"}
+
     prefix = f"[{chat_id}] " if chat_id else ""
     logger.info(f"{prefix}STEP EXECUTION TIMES:")
 
     total_time = 0.0
     for step_name, elapsed_time in timing_dict.items():
+        # Skip parent/composite timings entirely
+        if step_name in PARENT_TIMINGS:
+            continue
+
         # Special handling for inline streaming guardrails
         if step_name == "output_guardrails" and elapsed_time < 0.001:
             logger.info(f"  {step_name:25s}: (inline during streaming)")

From 5429bc05dc510dc6e2d33d16ac813abf26bca643 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Wed, 25 Feb 2026 13:09:16 +0530
Subject: [PATCH 11/27] added time tracking and reloacate input guardrail
 before toolclassifiier

---
 ...ces.sql => mock-count-active-services.sql} |   0
 ...s.sql => mock-get-all-active-services.sql} |   0
 ...e-by-id.sql => mock-get-service-by-id.sql} |   0
 .../rag-search/GET/services/get-services.yml  |   4 +-
 src/llm_orchestration_service.py              | 125 +++++++++++++-----
 5 files changed, 96 insertions(+), 33 deletions(-)
 rename DSL/Resql/rag-search/POST/{count-active-services.sql => mock-count-active-services.sql} (100%)
 rename DSL/Resql/rag-search/POST/{get-all-active-services.sql => mock-get-all-active-services.sql} (100%)
 rename DSL/Resql/rag-search/POST/{get-service-by-id.sql => mock-get-service-by-id.sql} (100%)

diff --git a/DSL/Resql/rag-search/POST/count-active-services.sql b/DSL/Resql/rag-search/POST/mock-count-active-services.sql
similarity index 100%
rename from DSL/Resql/rag-search/POST/count-active-services.sql
rename to DSL/Resql/rag-search/POST/mock-count-active-services.sql
diff --git a/DSL/Resql/rag-search/POST/get-all-active-services.sql b/DSL/Resql/rag-search/POST/mock-get-all-active-services.sql
similarity index 100%
rename from DSL/Resql/rag-search/POST/get-all-active-services.sql
rename to DSL/Resql/rag-search/POST/mock-get-all-active-services.sql
diff --git a/DSL/Resql/rag-search/POST/get-service-by-id.sql b/DSL/Resql/rag-search/POST/mock-get-service-by-id.sql
similarity index 100%
rename from DSL/Resql/rag-search/POST/get-service-by-id.sql
rename to DSL/Resql/rag-search/POST/mock-get-service-by-id.sql
diff --git a/DSL/Ruuter.public/rag-search/GET/services/get-services.yml b/DSL/Ruuter.public/rag-search/GET/services/get-services.yml
index d1ed395..01356d9 100644
--- a/DSL/Ruuter.public/rag-search/GET/services/get-services.yml
+++ b/DSL/Ruuter.public/rag-search/GET/services/get-services.yml
@@ -10,7 +10,7 @@ declaration:
 count_services:
   call: http.post
   args:
-    url: "[#RAG_SEARCH_RESQL]/count-active-services"
+    url: "[#RAG_SEARCH_RESQL]/mock-count-active-services"
     body: {}
   result: count_result
   next: check_service_count
@@ -41,7 +41,7 @@ return_semantic_search_response:
 fetch_all_services:
   call: http.post
   args:
-    url: "[#RAG_SEARCH_RESQL]/get-all-active-services"
+    url: "[#RAG_SEARCH_RESQL]/mock-get-all-active-services"
     body: {}
   result: services_result
   next: return_all_services
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 2705a29..30d2edd 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -134,9 +134,46 @@ def __init__(self) -> None:
         # This allows components to be initialized per-request with proper context
         self.tool_classifier = None
 
+        # Initialize shared guardrails adapter at startup
+        self.shared_guardrails_adapter = self._initialize_shared_guardrails_at_startup()
+
         # Log feature flag configuration
         FeatureFlags.log_configuration()
 
+    def _initialize_shared_guardrails_at_startup(self) -> Optional[NeMoRailsAdapter]:
+        """
+        Initialize shared guardrails at startup.
+
+        Returns:
+            NeMoRailsAdapter if successful, None on failure (graceful degradation)
+        """
+        try:
+            logger.info("  Initializing shared guardrails at startup...")
+            start_time = time.time()
+
+            # Initialize with production environment and no specific connection
+            # This creates a shared guardrails instance using default/production config
+            guardrails_adapter = self._initialize_guardrails(
+                environment="production",
+                connection_id=None,  # Shared configuration, not user-specific
+            )
+
+            elapsed_time = time.time() - start_time
+            logger.info(
+                f" Shared guardrails initialized successfully in {elapsed_time:.3f}s"
+            )
+
+            return guardrails_adapter
+
+        except Exception as e:
+            logger.error(f" Failed to initialize shared guardrails at startup: {e}")
+            logger.error(
+                "  Service will continue without guardrails (graceful degradation)"
+            )
+            # Return None - service continues without guardrails
+            # Per-request fallback will be attempted if needed
+            return None
+
     @observe(name="orchestration_request", as_type="agent")
     async def process_orchestration_request(
         self, request: OrchestrationRequest
@@ -219,6 +256,26 @@ async def process_orchestration_request(
             components = self._initialize_service_components(request)
             timing_dict["initialization"] = time.time() - start_time
 
+            if components["guardrails_adapter"]:
+                start_time = time.time()
+                input_blocked_response = await self.handle_input_guardrails(
+                    components["guardrails_adapter"], request, {}
+                )
+                timing_dict["input_guardrails_check"] = time.time() - start_time
+
+                if input_blocked_response:
+                    logger.warning(
+                        f"[{request.chatId}] Input blocked before classifier - "
+                        f"saved expensive service discovery"
+                    )
+                    log_step_timings(timing_dict, request.chatId)
+                    return input_blocked_response
+            else:
+                logger.info(
+                    f"[{request.chatId}] Guardrails not available - "
+                    f"proceeding without input validation"
+                )
+
             # TOOL CLASSIFIER INTEGRATION
             # Route through tool classifier if enabled, otherwise use existing RAG pipeline
             if FeatureFlags.TOOL_CLASSIFIER_ENABLED:
@@ -439,9 +496,12 @@ async def stream_orchestration_response(
                 components = self._initialize_service_components(request)
                 timing_dict["initialization"] = time.time() - start_time
 
-                # STEP 1: CHECK INPUT GUARDRAILS (blocking)
+                # PRIORITY 1 OPTIMIZATION: Input Guardrails Check BEFORE Classifier
+                # This implements fail-fast principle - block malicious/policy-violating inputs
+                # before expensive operations (service discovery, LLM calls, streaming setup)
+                # Saves 6.4s + $0.002 per blocked request!
                 logger.info(
-                    f"[{request.chatId}] [{stream_ctx.stream_id}] Step 1: Checking input guardrails"
+                    f"[{request.chatId}] [{stream_ctx.stream_id}] Checking input guardrails (before classifier)"
                 )
 
                 if components["guardrails_adapter"]:
@@ -455,19 +515,26 @@ async def stream_orchestration_response(
 
                     if not input_check_result.allowed:
                         logger.warning(
-                            f"[{request.chatId}] [{stream_ctx.stream_id}] Input blocked by guardrails: "
-                            f"{input_check_result.reason}"
+                            f"[{request.chatId}] [{stream_ctx.stream_id}] Input blocked before classifier - "
+                            f"saved expensive service discovery. Reason: {input_check_result.reason}"
                         )
                         yield self.format_sse(
                             request.chatId, INPUT_GUARDRAIL_VIOLATION_MESSAGE
                         )
                         yield self.format_sse(request.chatId, "END")
                         self.log_costs(costs_dict)
+                        # Log timings before returning (for visibility)
+                        log_step_timings(timing_dict, request.chatId)
                         stream_ctx.mark_completed()
                         return
+                else:
+                    logger.info(
+                        f"[{request.chatId}] [{stream_ctx.stream_id}] Guardrails not available - "
+                        f"proceeding without input validation"
+                    )
 
                 logger.info(
-                    f"[{request.chatId}] [{stream_ctx.stream_id}] Input guardrails passed "
+                    f"[{request.chatId}] [{stream_ctx.stream_id}] Input guardrails passed"
                 )
 
                 # TOOL CLASSIFIER INTEGRATION (STREAMING)
@@ -1015,10 +1082,20 @@ def _initialize_service_components(
             environment=request.environment, connection_id=request.connection_id
         )
 
-        # Initialize Guardrails Adapter (optional)
-        components["guardrails_adapter"] = self._safe_initialize_guardrails(
-            request.environment, request.connection_id
-        )
+        # Use shared guardrails adapter (initialized at startup)
+        # Falls back to per-request initialization if shared instance unavailable
+        if self.shared_guardrails_adapter is not None:
+            logger.debug(
+                f"Using shared guardrails adapter (startup-initialized, zero overhead)"
+            )
+            components["guardrails_adapter"] = self.shared_guardrails_adapter
+        else:
+            logger.warning(
+                f"Shared guardrails unavailable, initializing per-request (slower)"
+            )
+            components["guardrails_adapter"] = self._safe_initialize_guardrails(
+                request.environment, request.connection_id
+            )
 
         # Initialize Contextual Retriever (replaces hybrid retriever)
         components["contextual_retriever"] = self._safe_initialize_contextual_retriever(
@@ -1142,25 +1219,11 @@ async def _execute_orchestration_pipeline(
             timing_dict: Dictionary for timing tracking
             prefix: Optional prefix for timing keys (e.g., "rag" for workflow namespacing)
         """
-        # Note: Query validation now happens in process_orchestration_request()
-        # before component initialization for true early rejection
-
-        # Step 1: Input Guardrails Check
-        if components["guardrails_adapter"]:
-            start_time = time.time()
-            input_blocked_response = await self.handle_input_guardrails(
-                components["guardrails_adapter"], request, costs_dict
-            )
-            timing_key = (
-                f"{prefix}.input_guardrails_check"
-                if prefix
-                else "input_guardrails_check"
-            )
-            timing_dict[timing_key] = time.time() - start_time
-            if input_blocked_response:
-                return input_blocked_response
+        # Note: Query validation AND input guardrails check now happen at orchestration level
+        # (in process_orchestration_request) BEFORE classifier routing for true early rejection.
+        # This saves ~3.5s on blocked requests by failing fast before expensive workflow operations.
 
-        # Step 2: Refine user prompt
+        # Step 1: Refine user prompt
         start_time = time.time()
         refined_output, refiner_usage = self._refine_user_prompt(
             llm_manager=components["llm_manager"],
@@ -1171,7 +1234,7 @@ async def _execute_orchestration_pipeline(
         timing_dict[timing_key] = time.time() - start_time
         costs_dict["prompt_refiner"] = refiner_usage
 
-        # Step 3: Retrieve relevant chunks using contextual retrieval
+        # Step 2: Retrieve relevant chunks using contextual retrieval
         try:
             start_time = time.time()
             relevant_chunks = await self._safe_retrieve_contextual_chunks(
@@ -1193,7 +1256,7 @@ async def _execute_orchestration_pipeline(
             logger.info("No relevant chunks found - returning out-of-scope response")
             return self._create_out_of_scope_response(request)
 
-        # Step 4: Generate response
+        # Step 3: Generate response
         start_time = time.time()
         generated_response = self._generate_rag_response(
             llm_manager=components["llm_manager"],
@@ -1208,7 +1271,7 @@ async def _execute_orchestration_pipeline(
         )
         timing_dict[timing_key] = time.time() - start_time
 
-        # Step 5: Output Guardrails Check
+        # Step 4: Output Guardrails Check
         # Apply guardrails to all response types for consistent safety across all environments
         start_time = time.time()
         output_guardrails_response = await self.handle_output_guardrails(
@@ -1222,7 +1285,7 @@ async def _execute_orchestration_pipeline(
         )
         timing_dict[timing_key] = time.time() - start_time
 
-        # Step 6: Store inference data (for production and testing environments)
+        # Step 5: Store inference data (for production and testing environments)
         # Only store OrchestrationResponse (has chatId), not TestOrchestrationResponse
         if request.environment in [
             PRODUCTION_DEPLOYMENT_ENVIRONMENT,

From 721263a44cf34bb6bcece1228b4bb73436b3fb3d Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Wed, 25 Feb 2026 16:36:08 +0530
Subject: [PATCH 12/27] fixed issue

---
 src/llm_orchestration_service.py       | 9 +++------
 src/tool_classifier/intent_detector.py | 4 ++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 30d2edd..7432957 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -26,7 +26,6 @@
 from src.response_generator.response_generate import ResponseGeneratorAgent
 from src.response_generator.response_generate import stream_response_native
 from src.llm_orchestrator_config.llm_ochestrator_constants import (
-    OUT_OF_SCOPE_MESSAGE,
     OUT_OF_SCOPE_MESSAGES,
     TECHNICAL_ISSUE_MESSAGE,
     TECHNICAL_ISSUE_MESSAGES,
@@ -67,7 +66,7 @@
 class LangfuseConfig:
     """Configuration for Langfuse integration."""
 
-    def __init__(self):
+    def __init__(self) -> None:
         self.langfuse_client: Optional[Langfuse] = None
         self._initialize_langfuse()
 
@@ -496,10 +495,8 @@ async def stream_orchestration_response(
                 components = self._initialize_service_components(request)
                 timing_dict["initialization"] = time.time() - start_time
 
-                # PRIORITY 1 OPTIMIZATION: Input Guardrails Check BEFORE Classifier
                 # This implements fail-fast principle - block malicious/policy-violating inputs
                 # before expensive operations (service discovery, LLM calls, streaming setup)
-                # Saves 6.4s + $0.002 per blocked request!
                 logger.info(
                     f"[{request.chatId}] [{stream_ctx.stream_id}] Checking input guardrails (before classifier)"
                 )
@@ -1086,12 +1083,12 @@ def _initialize_service_components(
         # Falls back to per-request initialization if shared instance unavailable
         if self.shared_guardrails_adapter is not None:
             logger.debug(
-                f"Using shared guardrails adapter (startup-initialized, zero overhead)"
+                "Using shared guardrails adapter (startup-initialized, zero overhead)"
             )
             components["guardrails_adapter"] = self.shared_guardrails_adapter
         else:
             logger.warning(
-                f"Shared guardrails unavailable, initializing per-request (slower)"
+                "Shared guardrails unavailable, initializing per-request (slower)"
             )
             components["guardrails_adapter"] = self._safe_initialize_guardrails(
                 request.environment, request.connection_id
diff --git a/src/tool_classifier/intent_detector.py b/src/tool_classifier/intent_detector.py
index 24c1538..a2abb74 100644
--- a/src/tool_classifier/intent_detector.py
+++ b/src/tool_classifier/intent_detector.py
@@ -42,9 +42,9 @@ class IntentDetectionModule(dspy.Module):
     """DSPy Module for service intent detection."""
 
     def __init__(self) -> None:
-        """Initialize intent detection module with ChainOfThought."""
+        """Initialize intent detection module with Predict (direct prediction)."""
         super().__init__()
-        self.detector = dspy.ChainOfThought(ServiceIntentDetector)
+        self.detector = dspy.Predict(ServiceIntentDetector)
 
     def forward(
         self,

From f8a82b6c1f6237013ade045b5842f9c9158f8115 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Wed, 25 Feb 2026 17:58:53 +0530
Subject: [PATCH 13/27] fixed issue

---
 src/llm_orchestration_service.py | 90 ++++++++++++++++++++------------
 1 file changed, 58 insertions(+), 32 deletions(-)

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 7432957..0224a53 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -133,45 +133,69 @@ def __init__(self) -> None:
         # This allows components to be initialized per-request with proper context
         self.tool_classifier = None
 
-        # Initialize shared guardrails adapter at startup
-        self.shared_guardrails_adapter = self._initialize_shared_guardrails_at_startup()
+        # Initialize shared guardrails adapters at startup (production and testing)
+        self.shared_guardrails_adapters = (
+            self._initialize_shared_guardrails_at_startup()
+        )
 
         # Log feature flag configuration
         FeatureFlags.log_configuration()
 
-    def _initialize_shared_guardrails_at_startup(self) -> Optional[NeMoRailsAdapter]:
+    def _initialize_shared_guardrails_at_startup(self) -> Dict[str, NeMoRailsAdapter]:
         """
-        Initialize shared guardrails at startup.
+        Initialize shared guardrails adapters at startup for production and testing environments.
 
         Returns:
-            NeMoRailsAdapter if successful, None on failure (graceful degradation)
+            Dictionary mapping environment names to NeMoRailsAdapter instances.
+            Empty dict on failure (graceful degradation).
         """
-        try:
-            logger.info("  Initializing shared guardrails at startup...")
-            start_time = time.time()
+        adapters: Dict[str, NeMoRailsAdapter] = {}
 
-            # Initialize with production environment and no specific connection
-            # This creates a shared guardrails instance using default/production config
-            guardrails_adapter = self._initialize_guardrails(
-                environment="production",
-                connection_id=None,  # Shared configuration, not user-specific
-            )
+        # Initialize adapters for commonly-used environments
+        environments_to_initialize = ["production", "testing"]
 
-            elapsed_time = time.time() - start_time
-            logger.info(
-                f" Shared guardrails initialized successfully in {elapsed_time:.3f}s"
-            )
+        logger.info("  Initializing shared guardrails at startup...")
+        total_start_time = time.time()
 
-            return guardrails_adapter
+        for env in environments_to_initialize:
+            try:
+                logger.info(f"  Initializing guardrails for environment: {env}")
+                start_time = time.time()
 
-        except Exception as e:
-            logger.error(f" Failed to initialize shared guardrails at startup: {e}")
+                # Initialize with specific environment and no connection (shared config)
+                guardrails_adapter = self._initialize_guardrails(
+                    environment=env,
+                    connection_id=None,  # Shared configuration, not user-specific
+                )
+
+                elapsed_time = time.time() - start_time
+                adapters[env] = guardrails_adapter
+                logger.info(
+                    f" Guardrails for '{env}' initialized successfully in {elapsed_time:.3f}s"
+                )
+
+            except Exception as e:
+                logger.error(f" Failed to initialize guardrails for '{env}': {e}")
+                logger.warning(
+                    f"  Service will fall back to per-request initialization for '{env}' environment"
+                )
+                # Continue with other environments - partial success is acceptable
+                continue
+
+        total_elapsed = time.time() - total_start_time
+
+        if adapters:
+            logger.info(
+                f" Shared guardrails initialized for {len(adapters)} environment(s) "
+                f"in {total_elapsed:.3f}s total"
+            )
+        else:
             logger.error(
-                "  Service will continue without guardrails (graceful degradation)"
+                "  Failed to initialize any shared guardrails - "
+                "service will use per-request initialization (slower)"
             )
-            # Return None - service continues without guardrails
-            # Per-request fallback will be attempted if needed
-            return None
+
+        return adapters
 
     @observe(name="orchestration_request", as_type="agent")
     async def process_orchestration_request(
@@ -1079,16 +1103,18 @@ def _initialize_service_components(
             environment=request.environment, connection_id=request.connection_id
         )
 
-        # Use shared guardrails adapter (initialized at startup)
-        # Falls back to per-request initialization if shared instance unavailable
-        if self.shared_guardrails_adapter is not None:
-            logger.debug(
-                "Using shared guardrails adapter (startup-initialized, zero overhead)"
+        if request.environment in self.shared_guardrails_adapters:
+            logger.info(
+                f" Using shared guardrails adapter for environment='{request.environment}' "
+                f"(startup-initialized, zero overhead)"
             )
-            components["guardrails_adapter"] = self.shared_guardrails_adapter
+            components["guardrails_adapter"] = self.shared_guardrails_adapters[
+                request.environment
+            ]
         else:
             logger.warning(
-                "Shared guardrails unavailable, initializing per-request (slower)"
+                f" Shared guardrails unavailable for environment='{request.environment}', "
+                f"initializing per-request (slower)"
             )
             components["guardrails_adapter"] = self._safe_initialize_guardrails(
                 request.environment, request.connection_id

From 3b89fba35cf3b053c2d4fc9153224473ca0bda1a Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Thu, 26 Feb 2026 12:01:34 +0530
Subject: [PATCH 14/27] added hybrid search for the service detection

---
 docker-compose.yml                            |   1 +
 src/intent_data_enrichment/constants.py       |   4 +
 src/intent_data_enrichment/main_enrichment.py | 151 ++++++--
 src/intent_data_enrichment/models.py          |  21 +-
 src/intent_data_enrichment/qdrant_manager.py  | 274 ++++++++++----
 src/tool_classifier/classifier.py             | 350 ++++++++++++++++--
 src/tool_classifier/constants.py              |  19 +
 src/tool_classifier/sparse_encoder.py         |  82 ++++
 .../workflows/service_workflow.py             | 156 +++++++-
 9 files changed, 916 insertions(+), 142 deletions(-)
 create mode 100644 src/tool_classifier/sparse_encoder.py

diff --git a/docker-compose.yml b/docker-compose.yml
index 1fec54b..976e27f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -178,6 +178,7 @@ services:
       - ./DSL/CronManager/DSL:/DSL
       - ./DSL/CronManager/script:/app/scripts
       - ./src/vector_indexer:/app/src/vector_indexer
+      - ./src/tool_classifier:/app/src/tool_classifier
       - ./src/intent_data_enrichment:/app/src/intent_data_enrichment
       - ./src/utils/decrypt_vault_secrets.py:/app/src/utils/decrypt_vault_secrets.py:ro  # Decryption utility (read-only)
       - cron_data:/app/data
diff --git a/src/intent_data_enrichment/constants.py b/src/intent_data_enrichment/constants.py
index f1f35f3..f506880 100644
--- a/src/intent_data_enrichment/constants.py
+++ b/src/intent_data_enrichment/constants.py
@@ -24,6 +24,10 @@ class EnrichmentConstants:
     VECTOR_SIZE = 3072  # Azure text-embedding-3-large dimension
     DISTANCE_METRIC = "Cosine"
 
+    # Named Vector Configuration (for hybrid search)
+    DENSE_VECTOR_NAME = "dense"
+    SPARSE_VECTOR_NAME = "sparse"
+
     # Context Generation
     CONTEXT_TEMPLATE = """<document>
 {full_service_info}
diff --git a/src/intent_data_enrichment/main_enrichment.py b/src/intent_data_enrichment/main_enrichment.py
index d718678..d82358b 100644
--- a/src/intent_data_enrichment/main_enrichment.py
+++ b/src/intent_data_enrichment/main_enrichment.py
@@ -3,19 +3,61 @@
 Service Data Enrichment Script
 
 This script receives service data, enriches it with LLM-generated context,
-creates embeddings, and stores in Qdrant intent_collections.
+creates embeddings (dense + sparse per example), and stores in Qdrant intent_collections.
+
+Indexing strategy:
+- One 'example' point per example query (dense + sparse vectors of the example text)
+- One 'summary' point per service (dense + sparse vectors of name + description + context)
 """
 
 import sys
 import json
 import argparse
 import asyncio
+from typing import List
 from loguru import logger
 
 from intent_data_enrichment.models import ServiceData, EnrichedService, EnrichmentResult
 from intent_data_enrichment.api_client import LLMAPIClient
 from intent_data_enrichment.qdrant_manager import QdrantManager
 
+# Import sparse encoder from tool_classifier (shared module)
+sys.path.insert(0, "/app/src")
+try:
+    from tool_classifier.sparse_encoder import compute_sparse_vector
+except ImportError:
+    # Fallback for local development
+    try:
+        from src.tool_classifier.sparse_encoder import compute_sparse_vector
+    except ImportError:
+        logger.warning(
+            "Could not import sparse_encoder from tool_classifier, "
+            "attempting direct import"
+        )
+        import importlib.util
+        import os
+
+        # Try to find the module relative to this file
+        module_path = os.path.join(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+            "tool_classifier",
+            "sparse_encoder.py",
+        )
+        if os.path.exists(module_path):
+            spec = importlib.util.spec_from_file_location("sparse_encoder", module_path)
+            if spec is not None and spec.loader is not None:
+                sparse_module = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(sparse_module)
+                compute_sparse_vector = sparse_module.compute_sparse_vector
+            else:
+                raise ImportError(
+                    f"Cannot load spec or loader for sparse_encoder.py at {module_path}"
+                ) from None
+        else:
+            raise ImportError(
+                f"Cannot find sparse_encoder.py at {module_path}"
+            ) from None
+
 
 def parse_arguments() -> ServiceData:
     """Parse command line arguments into ServiceData model."""
@@ -76,7 +118,8 @@ def parse_arguments() -> ServiceData:
 
 async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
     """
-    Main enrichment pipeline: generate context, create embedding, store in Qdrant.
+    Main enrichment pipeline: generate context, create per-example embeddings,
+    store in Qdrant with hybrid vectors (dense + sparse).
 
     Args:
         service_data: Service data to enrich
@@ -85,14 +128,51 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
         EnrichmentResult with success/failure information
     """
     try:
-        # Step 1: Generate rich context using LLM
+        # Step 1: Generate rich context using LLM (unchanged from original)
         logger.info("Step 1: Generating rich context with LLM")
         async with LLMAPIClient() as api_client:
             context = await api_client.generate_context(service_data)
             logger.success(f"Context generated: {len(context)} characters")
 
-            # Step 2: Combine generated context with original metadata for embedding
-            logger.info("Step 2: Combining context with original service metadata")
+            # Step 2: Create per-example points (dense + sparse vectors)
+            logger.info(
+                f"Step 2: Creating per-example embeddings for "
+                f"{len(service_data.examples)} examples"
+            )
+            enriched_points: List[EnrichedService] = []
+
+            for i, example in enumerate(service_data.examples):
+                logger.info(
+                    f"  Creating embeddings for example {i + 1}/{len(service_data.examples)}: "
+                    f"'{example[:80]}...'" if len(example) > 80 else
+                    f"  Creating embeddings for example {i + 1}/{len(service_data.examples)}: "
+                    f"'{example}'"
+                )
+
+                # Dense: embed the individual example
+                dense_embedding = await api_client.create_embedding(example)
+
+                # Sparse: BM25-style term frequencies for the example
+                sparse_vec = compute_sparse_vector(example)
+
+                enriched_points.append(
+                    EnrichedService(
+                        id=service_data.service_id,
+                        name=service_data.name,
+                        description=service_data.description,
+                        examples=service_data.examples,
+                        entities=service_data.entities,
+                        context=context,
+                        embedding=dense_embedding,
+                        sparse_indices=sparse_vec.indices,
+                        sparse_values=sparse_vec.values,
+                        example_text=example,
+                        point_type="example",
+                    )
+                )
+
+            # Step 3: Create summary point (combined name + description + context)
+            logger.info("Step 3: Creating summary embedding")
             combined_text_parts = [
                 f"Service Name: {service_data.name}",
                 f"Description: {service_data.description}",
@@ -108,35 +188,44 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
                     f"Required Entities: {', '.join(service_data.entities)}"
                 )
 
-            # Add generated context last (enriched understanding)
             combined_text_parts.append(f"Enriched Context: {context}")
-
             combined_text = "\n".join(combined_text_parts)
-            logger.info(f"Combined text length: {len(combined_text)} characters")
-
-            # Step 3: Create embedding for combined text
-            logger.info("Step 3: Creating embedding vector for combined text")
-            embedding = await api_client.create_embedding(combined_text)
-            logger.success(f"Embedding created: {len(embedding)}-dimensional vector")
-
-        # Step 4: Prepare enriched service
-        enriched_service = EnrichedService(
-            id=service_data.service_id,
-            name=service_data.name,
-            description=service_data.description,
-            examples=service_data.examples,
-            entities=service_data.entities,
-            context=context,
-            embedding=embedding,
-        )
 
-        # Step 5: Store in Qdrant
-        logger.info("Step 5: Storing in Qdrant")
+            summary_embedding = await api_client.create_embedding(combined_text)
+            summary_sparse = compute_sparse_vector(combined_text)
+
+            enriched_points.append(
+                EnrichedService(
+                    id=service_data.service_id,
+                    name=service_data.name,
+                    description=service_data.description,
+                    examples=service_data.examples,
+                    entities=service_data.entities,
+                    context=context,
+                    embedding=summary_embedding,
+                    sparse_indices=summary_sparse.indices,
+                    sparse_values=summary_sparse.values,
+                    example_text=None,
+                    point_type="summary",
+                )
+            )
+
+        # Step 4: Delete existing points for this service (idempotent update)
+        logger.info("Step 4: Removing existing points for idempotent update")
         qdrant = QdrantManager()
         try:
             qdrant.connect()
             qdrant.ensure_collection()
-            success = qdrant.upsert_service(enriched_service)
+
+            # Delete old points before inserting new ones
+            qdrant.delete_service_points(service_data.service_id)
+
+            # Step 5: Bulk upsert all points (examples + summary)
+            logger.info(
+                f"Step 5: Storing {len(enriched_points)} points in Qdrant "
+                f"({len(service_data.examples)} examples + 1 summary)"
+            )
+            success = qdrant.upsert_service_points(enriched_points)
         finally:
             qdrant.close()
 
@@ -144,9 +233,13 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
             return EnrichmentResult(
                 success=True,
                 service_id=service_data.service_id,
-                message=f"Service '{service_data.name}' enriched and indexed successfully",
+                message=(
+                    f"Service '{service_data.name}' enriched and indexed successfully "
+                    f"({len(enriched_points)} points: "
+                    f"{len(service_data.examples)} examples + 1 summary)"
+                ),
                 context_length=len(context),
-                embedding_dimension=len(embedding),
+                embedding_dimension=len(summary_embedding),
                 error=None,
             )
         else:
diff --git a/src/intent_data_enrichment/models.py b/src/intent_data_enrichment/models.py
index eb0ef64..9390e73 100644
--- a/src/intent_data_enrichment/models.py
+++ b/src/intent_data_enrichment/models.py
@@ -20,7 +20,12 @@ class ServiceData(BaseModel):
 
 
 class EnrichedService(BaseModel):
-    """Enriched service data ready for storage."""
+    """Enriched service data ready for storage.
+
+    Each service produces multiple points in Qdrant:
+    - One 'example' point per example query (for precise matching)
+    - One 'summary' point for the combined service description + context
+    """
 
     id: str = Field(..., description="Service ID (maps to service_id)")
     name: str = Field(..., description="Service name")
@@ -28,7 +33,19 @@ class EnrichedService(BaseModel):
     examples: List[str] = Field(..., description="Example queries")
     entities: List[str] = Field(..., description="Expected entity names")
     context: str = Field(..., description="Generated rich context")
-    embedding: List[float] = Field(..., description="Context embedding vector")
+    embedding: List[float] = Field(..., description="Dense embedding vector")
+    sparse_indices: List[int] = Field(
+        default_factory=list, description="Sparse vector indices"
+    )
+    sparse_values: List[float] = Field(
+        default_factory=list, description="Sparse vector values"
+    )
+    example_text: Optional[str] = Field(
+        default=None, description="The specific example this point represents"
+    )
+    point_type: str = Field(
+        default="summary", description="Point type: 'example' or 'summary'"
+    )
 
 
 class EnrichmentResult(BaseModel):
diff --git a/src/intent_data_enrichment/qdrant_manager.py b/src/intent_data_enrichment/qdrant_manager.py
index 5024e23..579357a 100644
--- a/src/intent_data_enrichment/qdrant_manager.py
+++ b/src/intent_data_enrichment/qdrant_manager.py
@@ -1,10 +1,21 @@
-"""Qdrant manager for intent collections."""
+"""Qdrant manager for intent collections with hybrid search support."""
 
 import uuid
-from typing import Optional
+from typing import Optional, List
 from loguru import logger
 from qdrant_client import QdrantClient
-from qdrant_client.models import Distance, VectorParams, PointStruct
+from qdrant_client.models import (
+    Distance,
+    VectorParams,
+    PointStruct,
+    SparseVectorParams,
+    SparseIndexParams,
+    SparseVector,
+    Filter,
+    FieldCondition,
+    MatchValue,
+    FilterSelector,
+)
 
 from intent_data_enrichment.constants import EnrichmentConstants
 from intent_data_enrichment.models import EnrichedService
@@ -14,7 +25,7 @@
 
 
 class QdrantManager:
-    """Manages Qdrant operations for intent collections."""
+    """Manages Qdrant operations for intent collections with hybrid search."""
 
     def __init__(
         self,
@@ -44,7 +55,12 @@ def connect(self) -> None:
             raise
 
     def ensure_collection(self) -> None:
-        """Ensure the intent_collections collection exists with correct vector size."""
+        """Ensure the intent_collections collection exists with hybrid vector config.
+
+        The collection uses named vectors:
+        - 'dense': 3072-dim cosine similarity vectors for semantic matching
+        - 'sparse': BM25-style sparse vectors for keyword matching
+        """
         try:
             if not self.client:
                 raise RuntimeError(_CLIENT_NOT_INITIALIZED)
@@ -53,48 +69,60 @@ def ensure_collection(self) -> None:
             collection_names = [col.name for col in collections]
 
             if self.collection_name in collection_names:
-                # Check if existing collection has correct vector size
                 collection_info = self.client.get_collection(self.collection_name)
-
-                # Qdrant vectors config is a dict - get the default vector config
                 vectors_config = collection_info.config.params.vectors
 
-                existing_vector_size: Optional[int] = None
+                # Check if collection has the expected named vector configuration
                 if isinstance(vectors_config, dict):
-                    # Get first vector config (usually the default/unnamed one)
-                    if vectors_config:
-                        vector_params = next(iter(vectors_config.values()))
-                        existing_vector_size = vector_params.size
+                    if EnrichmentConstants.DENSE_VECTOR_NAME in vectors_config:
+                        existing_vector_size = vectors_config[
+                            EnrichmentConstants.DENSE_VECTOR_NAME
+                        ].size
+                        if existing_vector_size != EnrichmentConstants.VECTOR_SIZE:
+                            logger.error(
+                                f"Collection '{self.collection_name}' has incompatible vector size: "
+                                f"{existing_vector_size} (expected {EnrichmentConstants.VECTOR_SIZE})"
+                            )
+                            raise RuntimeError(
+                                f"Collection '{self.collection_name}' has incompatible vector size "
+                                f"({existing_vector_size} vs expected {EnrichmentConstants.VECTOR_SIZE}). "
+                                "To recreate the collection, manually delete it first using: "
+                                f"qdrant.client.delete_collection('{self.collection_name}') or via Qdrant UI/API."
+                            )
+                        logger.info(
+                            f"Collection '{self.collection_name}' already exists "
+                            f"with correct hybrid vector config (dense: {existing_vector_size}d + sparse)"
+                        )
+                    else:
+                        # Old collection format (unnamed/single vector) — needs migration
+                        logger.error(
+                            f"Collection '{self.collection_name}' exists but uses old single-vector format. "
+                            "Migration to named vectors (dense + sparse) required."
+                        )
+                        raise RuntimeError(
+                            f"Collection '{self.collection_name}' uses old single-vector format. "
+                            "Please delete the collection and re-index all services. "
+                            f"Delete with: qdrant.client.delete_collection('{self.collection_name}') "
+                            "or via Qdrant UI/API."
+                        )
                 elif vectors_config is not None:
-                    # Direct VectorParams object (older API)
-                    existing_vector_size = vectors_config.size
-
-                if existing_vector_size is None:
+                    # Direct VectorParams object (old single-vector format)
                     logger.error(
-                        f"Collection '{self.collection_name}' exists but vector size cannot be determined"
+                        f"Collection '{self.collection_name}' exists but uses old single-vector format."
                     )
                     raise RuntimeError(
-                        f"Collection '{self.collection_name}' exists but vector size cannot be determined. "
-                        "This may indicate a Qdrant API issue or unexpected collection configuration. "
-                        "Manual intervention required: verify Qdrant health, inspect collection config, "
-                        "or manually delete the collection if recreating is intended."
+                        f"Collection '{self.collection_name}' uses old single-vector format. "
+                        "Please delete the collection and re-index all services. "
+                        f"Delete with: qdrant.client.delete_collection('{self.collection_name}') "
+                        "or via Qdrant UI/API."
                     )
-                elif existing_vector_size != EnrichmentConstants.VECTOR_SIZE:
+                else:
                     logger.error(
-                        f"Collection '{self.collection_name}' has incompatible vector size: "
-                        f"{existing_vector_size} (expected {EnrichmentConstants.VECTOR_SIZE})"
+                        f"Collection '{self.collection_name}' exists but vector config cannot be determined"
                     )
                     raise RuntimeError(
-                        f"Collection '{self.collection_name}' has incompatible vector size "
-                        f"({existing_vector_size} vs expected {EnrichmentConstants.VECTOR_SIZE}). "
-                        "This prevents automatic deletion to avoid accidental data loss. "
-                        "To recreate the collection, manually delete it first using: "
-                        f"qdrant.client.delete_collection('{self.collection_name}') or via Qdrant UI/API."
-                    )
-                else:
-                    logger.info(
-                        f"Collection '{self.collection_name}' already exists "
-                        f"with correct vector size ({existing_vector_size})"
+                        f"Collection '{self.collection_name}' exists but vector config cannot be determined. "
+                        "Manual intervention required."
                     )
             else:
                 self._create_collection()
@@ -104,77 +132,175 @@ def ensure_collection(self) -> None:
             raise
 
     def _create_collection(self) -> None:
-        """Create the collection with correct vector configuration."""
+        """Create the collection with hybrid vector configuration (dense + sparse)."""
         if not self.client:
             raise RuntimeError(_CLIENT_NOT_INITIALIZED)
 
         logger.info(
             f"Creating collection '{self.collection_name}' "
-            f"with vector size {EnrichmentConstants.VECTOR_SIZE}"
+            f"with hybrid vectors (dense: {EnrichmentConstants.VECTOR_SIZE}d + sparse)"
         )
         self.client.create_collection(
             collection_name=self.collection_name,
-            vectors_config=VectorParams(
-                size=EnrichmentConstants.VECTOR_SIZE,
-                distance=Distance.COSINE,
-            ),
+            vectors_config={
+                EnrichmentConstants.DENSE_VECTOR_NAME: VectorParams(
+                    size=EnrichmentConstants.VECTOR_SIZE,
+                    distance=Distance.COSINE,
+                ),
+            },
+            sparse_vectors_config={
+                EnrichmentConstants.SPARSE_VECTOR_NAME: SparseVectorParams(
+                    index=SparseIndexParams(on_disk=False),
+                ),
+            },
         )
         logger.success(f"Collection '{self.collection_name}' created successfully")
 
-    def upsert_service(self, enriched_service: EnrichedService) -> bool:
-        """
-        Upsert enriched service to Qdrant (update if exists, insert if new).
+    def delete_service_points(self, service_id: str) -> bool:
+        """Delete all points belonging to a service.
+
+        Used before re-indexing to ensure idempotent updates, and when
+        a service is deactivated.
 
         Args:
-            enriched_service: EnrichedService instance containing the embedding and
-                associated metadata to upsert into Qdrant.
+            service_id: Service identifier to delete all points for
 
         Returns:
             True if successful, False otherwise
         """
         try:
             if not self.client:
-                raise RuntimeError("Qdrant client not initialized")
-
-            logger.info(f"Upserting service '{enriched_service.id}' to Qdrant")
-
-            # Convert service_id to UUID for Qdrant compatibility
-            # Qdrant requires point IDs to be either integers or UUIDs
-            point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, enriched_service.id))
-
-            # Prepare payload (all metadata except embedding)
-            payload = {
-                "service_id": enriched_service.id,  # Store original ID in payload
-                "name": enriched_service.name,
-                "description": enriched_service.description,
-                "examples": enriched_service.examples,
-                "entities": enriched_service.entities,
-                "context": enriched_service.context,
-            }
-
-            # Create point with UUID
-            point = PointStruct(
-                id=point_id,  # ✓ Now using UUID string
-                vector=enriched_service.embedding,
-                payload=payload,
+                raise RuntimeError(_CLIENT_NOT_INITIALIZED)
+
+            logger.info(
+                f"Deleting existing points for service '{service_id}' from Qdrant"
+            )
+
+            self.client.delete(
+                collection_name=self.collection_name,
+                points_selector=FilterSelector(
+                    filter=Filter(
+                        must=[
+                            FieldCondition(
+                                key="service_id",
+                                match=MatchValue(value=service_id),
+                            )
+                        ]
+                    )
+                ),
+            )
+
+            logger.success(
+                f"Successfully deleted points for service '{service_id}'"
+            )
+            return True
+
+        except Exception as e:
+            logger.error(
+                f"Failed to delete points for service '{service_id}': {e}"
             )
+            return False
+
+    def upsert_service_points(
+        self, enriched_points: List[EnrichedService]
+    ) -> bool:
+        """Upsert multiple enriched service points to Qdrant.
+
+        Each point contains both dense and sparse vectors for hybrid search.
+        Points are identified by a deterministic UUID based on service_id + point_index.
+
+        Args:
+            enriched_points: List of EnrichedService instances (examples + summary)
 
-            # Upsert to Qdrant
+        Returns:
+            True if all points upserted successfully, False otherwise
+        """
+        try:
+            if not self.client:
+                raise RuntimeError(_CLIENT_NOT_INITIALIZED)
+
+            if not enriched_points:
+                logger.warning("No points to upsert")
+                return True
+
+            service_id = enriched_points[0].id
+            logger.info(
+                f"Upserting {len(enriched_points)} points for service '{service_id}'"
+            )
+
+
+            from typing import Any, Dict
+            points: List[PointStruct] = []
+            for idx, enriched_service in enumerate(enriched_points):
+                # Deterministic UUID based on service_id + index
+                point_id_source = f"{enriched_service.id}_{idx}"
+                point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, point_id_source))
+
+                # Prepare payload
+                payload = {
+                    "service_id": enriched_service.id,
+                    "name": enriched_service.name,
+                    "description": enriched_service.description,
+                    "examples": enriched_service.examples,
+                    "entities": enriched_service.entities,
+                    "context": enriched_service.context,
+                    "point_type": enriched_service.point_type,
+                }
+
+                # Add example_text for example points
+                if enriched_service.example_text:
+                    payload["example_text"] = enriched_service.example_text
+
+                # Build named vectors (dense always, sparse if present)
+                vectors: Dict[str, Any] = {
+                    EnrichmentConstants.DENSE_VECTOR_NAME: enriched_service.embedding,
+                }
+                if enriched_service.sparse_indices:
+                    vectors[EnrichmentConstants.SPARSE_VECTOR_NAME] = SparseVector(
+                        indices=enriched_service.sparse_indices,
+                        values=enriched_service.sparse_values,
+                    )
+
+                point = PointStruct(
+                    id=point_id,
+                    vector=vectors, 
+                    payload=payload,
+                )
+
+                points.append(point)
+
+            # Bulk upsert
             self.client.upsert(
                 collection_name=self.collection_name,
-                points=[point],
+                points=points,
             )
 
             logger.success(
-                f"Successfully upserted service '{enriched_service.id}' "
-                f"({len(enriched_service.embedding)}-dim vector)"
+                f"Successfully upserted {len(points)} points for service '{service_id}' "
+                f"({sum(1 for p in enriched_points if p.point_type == 'example')} examples + "
+                f"{sum(1 for p in enriched_points if p.point_type == 'summary')} summary)"
             )
             return True
 
         except Exception as e:
-            logger.error(f"Failed to upsert service '{enriched_service.id}': {e}")
+            logger.error(
+                f"Failed to upsert service points: {e}"
+            )
             return False
 
+    def upsert_service(self, enriched_service: EnrichedService) -> bool:
+        """Upsert a single enriched service to Qdrant.
+
+        Backward-compatible wrapper that delegates to upsert_service_points.
+
+        Args:
+            enriched_service: EnrichedService instance
+
+        Returns:
+            True if successful, False otherwise
+        """
+        return self.upsert_service_points([enriched_service])
+
     def close(self) -> None:
         """Close Qdrant connection."""
         if self.client:
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index ab9e402..2313b94 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -1,6 +1,7 @@
-"""Main tool classifier for workflow routing."""
+"""Main tool classifier for workflow routing with hybrid search classification."""
 
 from typing import Any, AsyncIterator, Dict, List, Literal, Optional, Union, overload
+import httpx
 from loguru import logger
 
 from models.request_models import (
@@ -10,6 +11,17 @@
 )
 from tool_classifier.enums import WorkflowType, WORKFLOW_DISPLAY_NAMES
 from tool_classifier.models import ClassificationResult
+from tool_classifier.constants import (
+    QDRANT_HOST,
+    QDRANT_PORT,
+    QDRANT_COLLECTION,
+    QDRANT_TIMEOUT,
+    HYBRID_SEARCH_TOP_K,
+    HYBRID_SEARCH_MIN_THRESHOLD,
+    SCORE_RATIO_THRESHOLD,
+    SCORE_GAP_THRESHOLD,
+)
+from tool_classifier.sparse_encoder import compute_sparse_vector
 from tool_classifier.workflows import (
     ServiceWorkflowExecutor,
     ContextWorkflowExecutor,
@@ -22,19 +34,16 @@ class ToolClassifier:
     """
     Main classifier that determines which workflow should handle user queries.
 
+    Uses Qdrant hybrid search (dense + sparse + RRF fusion) to classify queries:
+    - High-confidence service match → SERVICE workflow (skip discovery + intent detection)
+    - Ambiguous match → SERVICE workflow with LLM confirmation
+    - No match → CONTEXT/RAG workflow (skip SERVICE entirely)
+
     Implements a layer-wise filtering approach:
     Layer 1: Service Workflow → External API calls
     Layer 2: Context Workflow → Conversation history/greetings
     Layer 3: RAG Workflow → Knowledge base retrieval
     Layer 4: OOD Workflow → Out-of-domain fallback
-
-    Each layer is tried in sequence. If a layer cannot handle the query
-    (returns None), the classifier falls back to the next layer.
-
-    Architecture:
-    - Strategy Pattern: Each workflow is a pluggable strategy
-    - Chain of Responsibility: Layers form a fallback chain
-    - Dependency Injection: LLM manager and connections injected from main service
     """
 
     def __init__(
@@ -52,6 +61,17 @@ def __init__(
         self.llm_manager = llm_manager
         self.orchestration_service = orchestration_service
 
+        # Shared httpx client for Qdrant queries (connection pooling)
+        self._qdrant_base_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}"
+        self._qdrant_client = httpx.AsyncClient(
+            base_url=self._qdrant_base_url,
+            timeout=QDRANT_TIMEOUT,
+            limits=httpx.Limits(
+                max_connections=20,
+                max_keepalive_connections=10,
+            ),
+        )
+
         # Initialize workflow executors
         self.service_workflow = ServiceWorkflowExecutor(
             llm_manager=llm_manager,
@@ -65,7 +85,10 @@ def __init__(
         )
         self.ood_workflow = OODWorkflowExecutor()
 
-        logger.info("Tool classifier initialized with all workflow executors")
+        logger.info(
+            "Tool classifier initialized with hybrid search classification "
+            f"(Qdrant: {self._qdrant_base_url})"
+        )
 
     async def classify(
         self,
@@ -74,13 +97,16 @@ async def classify(
         language: str,
     ) -> ClassificationResult:
         """
-        Classify a user query to determine which workflow should handle it.
+        Classify a user query using Qdrant hybrid search (dense + sparse + RRF).
 
-        Implements layer-wise classification logic with fallback chain:
-        1. SERVICE workflow (external API calls)
-        2. CONTEXT workflow (greetings/conversation history)
-        3. RAG workflow (knowledge base retrieval)
-        4. OOD workflow (out-of-domain)
+        Classification flow:
+        1. Generate dense embedding for the query
+        2. Generate sparse vector for the query (BM25-style)
+        3. Run hybrid search on intent_collections (prefetch dense + sparse → RRF fusion)
+        4. Apply score-gap analysis:
+           - Clear winner (high ratio + gap) → SERVICE with high confidence
+           - Ambiguous (scores exist but close) → SERVICE with LLM confirmation flag
+           - No match (low/no scores) → CONTEXT (skip SERVICE entirely)
 
         Args:
             query: User's query string
@@ -92,13 +118,291 @@ async def classify(
         """
         logger.info(f"Classifying query: {query[:100]}...")
 
-        logger.info("Starting layer-wise fallback: ")
-        return ClassificationResult(
-            workflow=WorkflowType.SERVICE,
-            confidence=1.0,
-            metadata={},
-            reasoning="Start with Service workflow - will cascade through layers",
-        )
+        try:
+            # Step 1: Generate dense embedding for query
+            query_embedding = self._get_query_embedding(query)
+            if query_embedding is None:
+                logger.warning("Failed to generate query embedding, falling back to CONTEXT/RAG")
+                return ClassificationResult(
+                    workflow=WorkflowType.CONTEXT,
+                    confidence=1.0,
+                    metadata={"reason": "embedding_generation_failed"},
+                    reasoning="Could not generate embedding - skip to Context/RAG",
+                )
+
+            # Step 2: Generate sparse vector for query
+            query_sparse = compute_sparse_vector(query)
+
+            # Step 3: Qdrant hybrid search with RRF fusion
+            results = await self._hybrid_search(
+                dense_vector=query_embedding,
+                sparse_vector=query_sparse,
+                top_k=HYBRID_SEARCH_TOP_K,
+            )
+
+            if not results:
+                logger.info("No hybrid search results - routing to CONTEXT/RAG")
+                return ClassificationResult(
+                    workflow=WorkflowType.CONTEXT,
+                    confidence=1.0,
+                    metadata={"reason": "no_service_match"},
+                    reasoning="No services matched the query",
+                )
+
+            # Step 4: Score-gap analysis
+            top = results[0]
+            top_score = top.get("rrf_score", 0.0)
+            top_service_id = top.get("service_id", "unknown")
+            top_service_name = top.get("name", "unknown")
+
+            second_score = results[1].get("rrf_score", 0.0) if len(results) > 1 else 0.0
+
+            score_ratio = top_score / max(second_score, 0.0001)
+            score_gap = top_score - second_score
+
+            logger.info(
+                f"Hybrid search results - "
+                f"top: {top_service_name} (score={top_score:.6f}), "
+                f"second: {results[1].get('name', 'none') if len(results) > 1 else 'none'} "
+                f"(score={second_score:.6f}), "
+                f"ratio={score_ratio:.2f}, gap={score_gap:.6f}"
+            )
+
+            # High confidence: clear winner → SERVICE (skip discovery + intent detection)
+            if score_ratio > SCORE_RATIO_THRESHOLD and score_gap > SCORE_GAP_THRESHOLD:
+                logger.info(
+                    f"High-confidence service match: {top_service_name} "
+                    f"(ratio={score_ratio:.2f}, gap={score_gap:.6f})"
+                )
+                return ClassificationResult(
+                    workflow=WorkflowType.SERVICE,
+                    confidence=min(score_ratio / 5.0, 1.0),
+                    metadata={
+                        "matched_service_id": top_service_id,
+                        "matched_service_name": top_service_name,
+                        "rrf_score": top_score,
+                        "score_gap": score_gap,
+                        "score_ratio": score_ratio,
+                        "needs_llm_confirmation": False,
+                        "top_results": results[:3],
+                    },
+                    reasoning=(
+                        f"High-confidence match: {top_service_name} "
+                        f"(ratio={score_ratio:.2f}, gap={score_gap:.6f})"
+                    ),
+                )
+
+            # Medium confidence: ambiguous → SERVICE with LLM confirmation
+            if top_score > HYBRID_SEARCH_MIN_THRESHOLD:
+                logger.info(
+                    f"Ambiguous service match: {top_service_name} "
+                    f"(score={top_score:.6f}, ratio={score_ratio:.2f}) - needs LLM confirmation"
+                )
+                return ClassificationResult(
+                    workflow=WorkflowType.SERVICE,
+                    confidence=0.5,
+                    metadata={
+                        "matched_service_id": top_service_id,
+                        "matched_service_name": top_service_name,
+                        "rrf_score": top_score,
+                        "score_gap": score_gap,
+                        "score_ratio": score_ratio,
+                        "needs_llm_confirmation": True,
+                        "top_results": results[:3],
+                    },
+                    reasoning=(
+                        f"Ambiguous match: {top_service_name} "
+                        f"(score={top_score:.6f}) - LLM confirmation needed"
+                    ),
+                )
+
+            # No confidence: skip SERVICE entirely → CONTEXT/RAG
+            logger.info(
+                f"No service match (top_score={top_score:.6f} below threshold "
+                f"{HYBRID_SEARCH_MIN_THRESHOLD}) - routing to CONTEXT/RAG"
+            )
+            return ClassificationResult(
+                workflow=WorkflowType.CONTEXT,
+                confidence=1.0,
+                metadata={"reason": "below_threshold", "top_score": top_score},
+                reasoning=f"Top score {top_score:.6f} below threshold - skip to Context/RAG",
+            )
+
+        except Exception as e:
+            logger.error(f"Hybrid classification failed: {e}", exc_info=True)
+            # Fallback: route to CONTEXT/RAG on any error
+            return ClassificationResult(
+                workflow=WorkflowType.CONTEXT,
+                confidence=1.0,
+                metadata={"reason": "classification_error", "error": str(e)},
+                reasoning=f"Classification error - falling back to Context/RAG: {e}",
+            )
+
+    def _get_query_embedding(self, query: str) -> Optional[List[float]]:
+        """Generate dense embedding for a query using the orchestration service.
+
+        Args:
+            query: Query text to embed
+
+        Returns:
+            List of floats representing the dense embedding, or None on failure
+        """
+        try:
+            if not self.orchestration_service:
+                logger.error("Orchestration service not available for embedding")
+                return None
+
+            result = self.orchestration_service.create_embeddings_for_indexer(
+                texts=[query],
+                environment="production",
+                batch_size=1,
+            )
+
+            embeddings = result.get("embeddings", [])
+            if embeddings and len(embeddings) > 0:
+                return embeddings[0]
+
+            logger.error("No embedding returned for query")
+            return None
+
+        except Exception as e:
+            logger.error(f"Failed to generate query embedding: {e}")
+            return None
+
+    async def _hybrid_search(
+        self,
+        dense_vector: List[float],
+        sparse_vector: Any,
+        top_k: int = HYBRID_SEARCH_TOP_K,
+    ) -> List[Dict[str, Any]]:
+        """Execute hybrid search on Qdrant using prefetch + RRF fusion.
+
+        Sends both dense and sparse vectors in a single Qdrant query,
+        using the prefetch API for parallel retrieval and RRF for fusion.
+
+        Args:
+            dense_vector: Dense embedding vector (3072-dim)
+            sparse_vector: SparseVector with indices and values
+            top_k: Number of results to return
+
+        Returns:
+            List of result dicts with service metadata and rrf_score
+        """
+        try:
+            # Check if collection exists and has data
+            try:
+                collection_info = await self._qdrant_client.get(
+                    f"/collections/{QDRANT_COLLECTION}"
+                )
+                if collection_info.status_code == 200:
+                    info = collection_info.json()
+                    points_count = info.get("result", {}).get("points_count", 0)
+                    if points_count == 0:
+                        logger.info("Intent collection is empty - no services indexed")
+                        return []
+                else:
+                    logger.warning(
+                        f"Could not verify collection: HTTP {collection_info.status_code}"
+                    )
+                    return []
+            except Exception as e:
+                logger.warning(f"Could not verify intent collection: {e}")
+                return []
+
+            # Build hybrid search payload with prefetch + RRF
+            search_payload: Dict[str, Any] = {
+                "prefetch": [
+                    {
+                        "query": dense_vector,
+                        "using": "dense",
+                        "limit": top_k * 2,
+                    },
+                ],
+                "query": {"fusion": "rrf"},
+                "limit": top_k,
+                "with_payload": True,
+            }
+
+            # Add sparse prefetch only if sparse vector is non-empty
+            if not sparse_vector.is_empty():
+                search_payload["prefetch"].append(
+                    {
+                        "query": sparse_vector.to_dict(),
+                        "using": "sparse",
+                        "limit": top_k * 2,
+                    }
+                )
+
+            response = await self._qdrant_client.post(
+                f"/collections/{QDRANT_COLLECTION}/points/query",
+                json=search_payload,
+            )
+
+            if response.status_code != 200:
+                logger.error(
+                    f"Qdrant hybrid search failed: HTTP {response.status_code} - "
+                    f"{response.text}"
+                )
+                return []
+
+            search_results = response.json()
+            points = search_results.get("result", {}).get("points", [])
+
+            if not points:
+                logger.info("No results from hybrid search")
+                return []
+
+            # Parse and deduplicate results (group by service_id, keep best score)
+            service_results: Dict[str, Dict[str, Any]] = {}
+            for point in points:
+                payload = point.get("payload", {})
+                score = float(point.get("score", 0))
+                service_id = payload.get("service_id", "unknown")
+
+                if service_id not in service_results or score > service_results[service_id].get("rrf_score", 0):
+                    service_results[service_id] = {
+                        "service_id": service_id,
+                        "name": payload.get("name", ""),
+                        "description": payload.get("description", ""),
+                        "examples": payload.get("examples", []),
+                        "entities": payload.get("entities", []),
+                        "context": payload.get("context", ""),
+                        "point_type": payload.get("point_type", "unknown"),
+                        "example_text": payload.get("example_text"),
+                        "rrf_score": score,
+                    }
+
+            # Sort by RRF score descending
+            sorted_results = sorted(
+                service_results.values(),
+                key=lambda x: x["rrf_score"],
+                reverse=True,
+            )
+
+            logger.info(
+                f"Hybrid search found {len(sorted_results)} unique services "
+                f"from {len(points)} points"
+            )
+
+            for i, r in enumerate(sorted_results[:3]):
+                logger.debug(
+                    f"  Rank {i + 1}: {r['name']} "
+                    f"(service_id={r['service_id']}, "
+                    f"rrf_score={r['rrf_score']:.6f}, "
+                    f"type={r['point_type']})"
+                )
+
+            return sorted_results
+
+        except httpx.TimeoutException:
+            logger.error(
+                f"Qdrant hybrid search timeout after {QDRANT_TIMEOUT}s"
+            )
+            return []
+        except Exception as e:
+            logger.error(f"Hybrid search failed: {e}", exc_info=True)
+            return []
+
 
     @overload
     async def route_to_workflow(
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index c885b52..7db6aa9 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -58,3 +58,22 @@
 SERVICE_COUNT_THRESHOLD = 10
 """Threshold for triggering semantic search. If service count > this value,
 semantic search is used instead of sending all services to LLM."""
+
+
+# ============================================================================
+# Hybrid Search Classification Thresholds
+# ============================================================================
+
+HYBRID_SEARCH_TOP_K = 5
+"""Number of top results from hybrid search for classification."""
+
+HYBRID_SEARCH_MIN_THRESHOLD = 0.01
+"""Minimum RRF score to consider a result as a potential match."""
+
+SCORE_RATIO_THRESHOLD = 2.0
+"""Score ratio (top/second) for confident service classification.
+If the top result's RRF score is > 2x the second result, it's a high-confidence match."""
+
+SCORE_GAP_THRESHOLD = 0.005
+"""Absolute score gap for confident classification.
+Prevents false positives when both scores are very low."""
diff --git a/src/tool_classifier/sparse_encoder.py b/src/tool_classifier/sparse_encoder.py
new file mode 100644
index 0000000..0d0dc3f
--- /dev/null
+++ b/src/tool_classifier/sparse_encoder.py
@@ -0,0 +1,82 @@
+"""
+Sparse vector encoder for BM25-style term frequency vectors.
+
+Shared module used by both:
+- intent_data_enrichment (indexing time) — to create sparse vectors for service examples
+- tool_classifier (query time) — to create sparse vectors for user queries
+
+Uses hash-based indexing compatible with Qdrant's sparse vector format.
+"""
+
+import re
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import List
+
+
+# Hash space for sparse vector indices
+# Larger = fewer collisions but more memory; 50K is a good balance for intent classification
+SPARSE_VOCAB_SIZE = 50_000
+
+# Simple word tokenizer matching the pattern used in contextual_retrieval/bm25_search.py
+TOKENIZER_PATTERN = re.compile(r"\w+")
+
+
+@dataclass
+class SparseVector:
+    """Sparse vector representation for Qdrant.
+
+    Attributes:
+        indices: Sorted list of non-zero dimension indices
+        values: Corresponding values for each index
+    """
+
+    indices: List[int] = field(default_factory=list)
+    values: List[float] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        """Convert to Qdrant API format."""
+        return {"indices": self.indices, "values": self.values}
+
+    def is_empty(self) -> bool:
+        """Check if the sparse vector has no entries."""
+        return len(self.indices) == 0
+
+
+def compute_sparse_vector(text: str) -> SparseVector:
+    """Convert text to a sparse vector using term-frequency hashing.
+
+    Tokenizes the input text, counts term frequencies, and maps each token
+    to a hash-based index in the sparse vector space. This creates a
+    BM25-compatible representation that Qdrant can use for sparse search.
+
+    Args:
+        text: Input text to vectorize
+
+    Returns:
+        SparseVector with hash-based indices and term frequency values
+    """
+    if not text or not text.strip():
+        return SparseVector()
+
+    # Tokenize: lowercase and extract word tokens
+    tokens = TOKENIZER_PATTERN.findall(text.lower())
+    if not tokens:
+        return SparseVector()
+
+    # Count term frequencies
+    token_counts = Counter(tokens)
+
+    # Hash-based indexing: map each token to an index in [0, SPARSE_VOCAB_SIZE)
+    # Collisions are handled by summing values at the same index
+    hash_counts: dict[int, float] = {}
+    for token, count in token_counts.items():
+        idx = hash(token) % SPARSE_VOCAB_SIZE
+        # Handle hash collisions by accumulating
+        hash_counts[idx] = hash_counts.get(idx, 0) + float(count)
+
+    # Sort indices for consistent representation (Qdrant requirement)
+    sorted_indices = sorted(hash_counts.keys())
+    sorted_values = [hash_counts[i] for i in sorted_indices]
+
+    return SparseVector(indices=sorted_indices, values=sorted_values)
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index bed97dd..747c987 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -553,9 +553,14 @@ async def execute_async(
     ) -> Optional[OrchestrationResponse]:
         """Execute service workflow in non-streaming mode.
 
+        Uses classification metadata from hybrid search:
+        - needs_llm_confirmation=False: Skip discovery + intent detection, use matched service
+        - needs_llm_confirmation=True: Run LLM intent detection on candidate services only
+        - No metadata: Fall back to original discovery flow
+
         Args:
             request: Orchestration request
-            context: Workflow context
+            context: Workflow context (contains classification metadata)
             timing_dict: Optional timing dictionary for unified tracking
         """
         import time
@@ -568,12 +573,77 @@ async def execute_async(
         if timing_dict is None:
             timing_dict = {}
 
-        # Service discovery with timing
-        start_time = time.time()
-        await self._log_request_details(
-            request, context, mode="non-streaming", costs_dict=costs_dict
-        )
-        timing_dict["service.discovery"] = time.time() - start_time
+        # Check if classifier provided hybrid search metadata
+        needs_llm_confirmation = context.get("needs_llm_confirmation")
+
+        if needs_llm_confirmation is False:
+            # HIGH CONFIDENCE PATH: Classifier matched a service with high confidence
+            # Skip service discovery — use hybrid search match directly
+            matched_service_id = context.get("matched_service_id")
+            matched_service_name = context.get("matched_service_name")
+            rrf_score = context.get("rrf_score", 0)
+
+            logger.info(
+                f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (non-streaming): "
+                f"{matched_service_name} (rrf_score={rrf_score:.6f}) - "
+                f"skipping discovery"
+            )
+
+            # Get service details from top_results (already retrieved by classifier)
+            top_results = context.get("top_results", [])
+            if top_results:
+                matched = top_results[0]
+
+                # Run entity extraction via LLM (DSPy) for this single service
+                start_time = time.time()
+                await self._process_intent_detection(
+                    services=[matched],
+                    request=request,
+                    chat_id=chat_id,
+                    context=context,
+                    costs_dict=costs_dict,
+                )
+                timing_dict["service.intent_detection"] = time.time() - start_time
+
+                # Ensure service_data is populated from hybrid match
+                # _process_intent_detection may not set it if DSPy returns
+                # a different service_id format, so we populate it explicitly
+                if not context.get("service_data"):
+                    context["service_id"] = matched.get("service_id")
+                    context["service_data"] = matched
+                    logger.info(
+                        f"[{chat_id}] Populated service_data from hybrid match: "
+                        f"{matched.get('name')}"
+                    )
+
+        elif needs_llm_confirmation is True:
+            # AMBIGUOUS PATH: Multiple services scored similarly
+            # Run LLM intent detection only on candidate services (not all services)
+            top_results = context.get("top_results", [])
+            logger.info(
+                f"[{chat_id}] AMBIGUOUS SERVICE MATCH (non-streaming): "
+                f"running LLM intent detection on {len(top_results)} candidates"
+            )
+
+            start_time = time.time()
+            if top_results:
+                await self._process_intent_detection(
+                    services=top_results,
+                    request=request,
+                    chat_id=chat_id,
+                    context=context,
+                    costs_dict=costs_dict,
+                )
+            timing_dict["service.discovery"] = time.time() - start_time
+
+        else:
+            # LEGACY PATH: No hybrid search metadata (classifier disabled or error)
+            # Full service discovery + intent detection (original behavior)
+            start_time = time.time()
+            await self._log_request_details(
+                request, context, mode="non-streaming", costs_dict=costs_dict
+            )
+            timing_dict["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):
@@ -692,9 +762,11 @@ async def execute_streaming(
     ) -> Optional[AsyncIterator[str]]:
         """Execute service workflow in streaming mode.
 
+        Uses classification metadata from hybrid search (same as execute_async).
+
         Args:
             request: Orchestration request
-            context: Workflow context
+            context: Workflow context (contains classification metadata)
             timing_dict: Optional timing dictionary for unified tracking
         """
         import time
@@ -707,12 +779,68 @@ async def execute_streaming(
         if timing_dict is None:
             timing_dict = {}
 
-        # Service discovery with timing
-        start_time = time.time()
-        await self._log_request_details(
-            request, context, mode="streaming", costs_dict=costs_dict
-        )
-        timing_dict["service.discovery"] = time.time() - start_time
+        # Check if classifier provided hybrid search metadata
+        needs_llm_confirmation = context.get("needs_llm_confirmation")
+
+        if needs_llm_confirmation is False:
+            # HIGH CONFIDENCE PATH: Skip discovery, use matched service
+            matched_service_name = context.get("matched_service_name")
+            rrf_score = context.get("rrf_score", 0)
+
+            logger.info(
+                f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (streaming): "
+                f"{matched_service_name} (rrf_score={rrf_score:.6f})"
+            )
+
+            top_results = context.get("top_results", [])
+            if top_results:
+                matched = top_results[0]
+
+                start_time = time.time()
+                await self._process_intent_detection(
+                    services=[matched],
+                    request=request,
+                    chat_id=chat_id,
+                    context=context,
+                    costs_dict=costs_dict,
+                )
+                timing_dict["service.intent_detection"] = time.time() - start_time
+
+                # Ensure service_data is populated from hybrid match
+                if not context.get("service_data"):
+                    context["service_id"] = matched.get("service_id")
+                    context["service_data"] = matched
+                    logger.info(
+                        f"[{chat_id}] Populated service_data from hybrid match: "
+                        f"{matched.get('name')}"
+                    )
+
+        elif needs_llm_confirmation is True:
+            # AMBIGUOUS PATH: Run LLM intent detection on candidates
+            top_results = context.get("top_results", [])
+            logger.info(
+                f"[{chat_id}] AMBIGUOUS SERVICE MATCH (streaming): "
+                f"{len(top_results)} candidates"
+            )
+
+            start_time = time.time()
+            if top_results:
+                await self._process_intent_detection(
+                    services=top_results,
+                    request=request,
+                    chat_id=chat_id,
+                    context=context,
+                    costs_dict=costs_dict,
+                )
+            timing_dict["service.discovery"] = time.time() - start_time
+
+        else:
+            # LEGACY PATH: Full service discovery (original behavior)
+            start_time = time.time()
+            await self._log_request_details(
+                request, context, mode="streaming", costs_dict=costs_dict
+            )
+            timing_dict["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):

From 789f062e3fdd6aca000ce1551fc8f411328020d8 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Sun, 1 Mar 2026 10:38:40 +0530
Subject: [PATCH 15/27] update tool classifier

---
 docs/HYBRID_SEARCH_CLASSIFICATION.md | 380 +++++++++++++++++++++++++++
 src/tool_classifier/classifier.py    | 263 ++++++++++++------
 src/tool_classifier/constants.py     |  24 +-
 3 files changed, 579 insertions(+), 88 deletions(-)
 create mode 100644 docs/HYBRID_SEARCH_CLASSIFICATION.md

diff --git a/docs/HYBRID_SEARCH_CLASSIFICATION.md b/docs/HYBRID_SEARCH_CLASSIFICATION.md
new file mode 100644
index 0000000..3e29b99
--- /dev/null
+++ b/docs/HYBRID_SEARCH_CLASSIFICATION.md
@@ -0,0 +1,380 @@
+# Hybrid Search Classification & Intent Data Enrichment
+
+> Updated architecture for the Tool Classifier using hybrid search (dense + sparse + RRF) with per-example indexing.  
+> Replaces the single-embedding approach documented in `TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md`.
+
+---
+
+## Table of Contents
+
+1. [Architecture Overview](#architecture-overview)
+2. [Intent Data Enrichment (Indexing)](#intent-data-enrichment-indexing)
+3. [Classification Flow (Query Time)](#classification-flow-query-time)
+4. [Intent Detection & Entity Extraction](#intent-detection--entity-extraction)
+5. [Thresholds & Configuration](#thresholds--configuration)
+
+---
+
+## Architecture Overview
+
+The system has two phases:
+
+1. **Indexing (offline):** For each service, create multiple Qdrant points with dense + sparse vectors
+2. **Classification (query time):** Two-step search to route queries — dense for relevance, hybrid for service identification
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                     INDEXING (Offline)                               │
+│                                                                     │
+│  service_enrichment.sh → main_enrichment.py                        │
+│    ├─ LLM context generation                                       │
+│    ├─ Per-example: dense embedding + sparse BM25 vector             │
+│    ├─ Summary: dense embedding + sparse BM25 vector                 │
+│    └─ Qdrant upsert (N examples + 1 summary = N+1 points)         │
+├─────────────────────────────────────────────────────────────────────┤
+│                  CLASSIFICATION (Query Time)                        │
+│                                                                     │
+│  User Query                                                         │
+│    ├─ Step 1: Dense search → cosine similarity (relevance check)   │
+│    ├─ Step 2: Hybrid search → RRF fusion (service identification)  │
+│    └─ Route: HIGH-CONFIDENCE / AMBIGUOUS / CONTEXT-RAG             │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Intent Data Enrichment (Indexing)
+
+### Source Files
+
+| File | Role |
+|------|------|
+| `DSL/CronManager/script/service_enrichment.sh` | Entry point — sets environment, runs Python script |
+| `src/intent_data_enrichment/main_enrichment.py` | Orchestrates per-example and summary point creation |
+| `src/intent_data_enrichment/qdrant_manager.py` | Qdrant collection management, upsert, and deletion |
+| `src/intent_data_enrichment/api_client.py` | LLM API calls (context generation, embeddings) |
+| `src/intent_data_enrichment/models.py` | `EnrichedService` data model |
+| `src/tool_classifier/sparse_encoder.py` | BM25-style sparse vector computation |
+
+### What Changed: Single Embedding → Per-Example Indexing
+
+**Before (old):** One point per service from concatenated text.
+
+**After (new):** N+1 points per service — one per example query, plus one summary.
+
+Example for a service with 3 examples:
+```
+Service "Valuutakursid" → 4 Qdrant points
+
+  Point 0 (example): "Mis suhe on euro ja usd vahel"
+    dense:  3072-dim embedding of this exact text
+    sparse: BM25 vector → {euro: 1.0, usd: 1.0, suhe: 1.0, ...}
+
+  Point 1 (example): "Mis on euro ja btc vahetuskurss?"
+    dense:  3072-dim embedding of this exact text
+    sparse: BM25 vector → {euro: 1.0, btc: 1.0, vahetuskurss: 1.0, ...}
+
+  Point 2 (example): "euro ja gbp vaheline kurss"
+    dense:  3072-dim embedding of this exact text
+    sparse: BM25 vector → {euro: 1.0, gbp: 1.0, kurss: 1.0, ...}
+
+  Point 3 (summary): "Valuutakursid - Kasutaja soovib infot..."
+    dense:  3072-dim embedding of name + description + LLM context
+    sparse: BM25 vector of combined text
+```
+
+### Why Per-Example Indexing?
+
+- Each example gets its own embedding, matching diverse user phrasings better
+- Short example queries aren't diluted by long descriptions
+- More examples = wider coverage "net" for query matching
+- Sparse vectors enable keyword matching ("EUR", "USD") alongside semantic search
+
+### Dense vs Sparse Vectors
+
+| Type | Generation | Strength |
+|------|-----------|----------|
+| **Dense** (3072-dim) | `text-embedding-3-large` via Azure OpenAI | Semantic similarity — matches paraphrases, cross-language |
+| **Sparse** (BM25) | Term frequency hashing (`sparse_encoder.py`) | Keyword overlap — exact token matching ("EUR", "USD", "THB") |
+
+### Sparse Vector Generation
+
+```python
+# sparse_encoder.py
+text = "Mis suhe on euro ja usd vahel"
+tokens = re.findall(r"\w+", text.lower())  # ["mis", "suhe", "on", "euro", ...]
+# Each token → hashed to index in [0, VOCAB_SIZE), value = term frequency
+# Output: SparseVector(indices=[hash("mis"), hash("euro"), ...], values=[1.0, 1.0, ...])
+```
+
+### Qdrant Collection Schema
+
+```python
+# Collection: "intent_collections"
+vectors_config = {
+    "dense": VectorParams(size=3072, distance=Distance.COSINE)
+}
+sparse_vectors_config = {
+    "sparse": SparseVectorParams(index=SparseIndexParams())
+}
+```
+
+Each point payload:
+```json
+{
+  "service_id": "common_service_exchange_rate",
+  "name": "Valuutakursid",
+  "description": "Kasutaja soovib infot valuutade kohta",
+  "examples": ["Mis suhe on euro ja usd vahel", "..."],
+  "entities": ["currency_from", "currency_to"],
+  "context": "LLM-generated enriched context...",
+  "point_type": "example",
+  "example_text": "Mis suhe on euro ja usd vahel",
+  "point_index": 0
+}
+```
+
+### Enrichment Pipeline Flow
+
+```
+service_enrichment.sh
+  │
+  ├─ Parse args: service_id, name, description, examples, entities
+  │
+  ├─ Step 1: LLM context generation (enriched description)
+  │
+  ├─ Step 2: For each example query:
+  │    ├─ Generate dense embedding (text-embedding-3-large)
+  │    └─ Generate sparse vector (BM25 term hashing)
+  │
+  ├─ Step 3: Summary point (name + description + LLM context):
+  │    ├─ Generate dense embedding
+  │    └─ Generate sparse vector
+  │
+  ├─ Step 4: Delete existing points for this service (idempotent)
+  │
+  └─ Step 5: Bulk upsert N+1 points to Qdrant
+```
+
+### Service Deletion
+
+When a service is deactivated, all its points are removed:
+```python
+qdrant_manager.delete_service_points(service_id)
+# Uses payload filter: {"service_id": service_id}
+```
+
+---
+
+## Classification Flow (Query Time)
+
+### Source Files
+
+| File | Role |
+|------|------|
+| `src/tool_classifier/classifier.py` | Two-step search + routing decisions |
+| `src/tool_classifier/constants.py` | All thresholds and configuration |
+| `src/tool_classifier/sparse_encoder.py` | Query sparse vector generation |
+| `src/tool_classifier/workflows/service_workflow.py` | Service execution with 3 routing paths |
+
+### Step 1: Dense Search — "Is This a Service Query?"
+
+Queries Qdrant using only the dense vector to get **actual cosine similarity scores** (0.0 – 1.0).
+
+```python
+# classifier.py → _dense_search()
+POST /collections/intent_collections/points/query
+{
+    "query": [0.023, -0.041, ...],  # 3072-dim dense vector
+    "using": "dense",
+    "limit": 6,
+    "with_payload": true
+}
+```
+
+Results are deduplicated by `service_id` (best score per service).
+
+**Why not use RRF scores?**  
+Qdrant's RRF uses `1/(1+rank)`, producing fixed scores (0.50, 0.33, 0.25) regardless of actual relevance. A perfect match and a random query both get 0.50 for rank 1. Cosine similarity reflects true semantic closeness.
+
+### Step 2: Hybrid Search — "Which Service?"
+
+Only runs if cosine ≥ `DENSE_MIN_THRESHOLD`. Combines dense + sparse search with RRF fusion.
+
+```python
+# classifier.py → _hybrid_search()
+POST /collections/intent_collections/points/query
+{
+    "prefetch": [
+        {"query": dense_vector, "using": "dense", "limit": 20},
+        {"query": {"indices": [...], "values": [...]}, "using": "sparse", "limit": 20}
+    ],
+    "query": {"fusion": "rrf"},
+    "limit": 5,
+    "with_payload": true
+}
+```
+
+### Routing Decision
+
+```
+Dense cosine score + gap
+        │
+        ├─ cosine < 0.20              → PATH 1: Skip SERVICE → CONTEXT/RAG
+        │
+        ├─ cosine ≥ 0.40 AND          → PATH 2: HIGH-CONFIDENCE SERVICE
+        │  gap ≥ 0.05                     (skip discovery, entity extraction only)
+        │
+        └─ else (0.20 ≤ cosine < 0.40 → PATH 3: AMBIGUOUS SERVICE
+           OR gap < 0.05)                 (LLM intent detection on candidates)
+```
+
+### Path 1: Non-Service Query → CONTEXT/RAG
+
+Top cosine score below minimum threshold. The query has no meaningful similarity to any indexed service.
+
+```
+Query: "Tere, kuidas läheb?"
+Dense: top cosine=0.15 → below 0.20 → skip SERVICE
+→ Routes directly to CONTEXT → RAG (saves ~2-4s)
+```
+
+### Path 2: HIGH-CONFIDENCE Service Match
+
+One service clearly stands out with high cosine and large gap to second result.
+
+```
+Query: "Palju saan 1 EUR eest THBdes?"
+Dense: Valuutakursid (cosine=0.5511), gap=0.2371
+→ 0.5511 ≥ 0.40 AND 0.2371 ≥ 0.05 → HIGH-CONFIDENCE
+→ Skips service discovery
+→ Runs entity extraction on matched service only
+→ Entities: {currency_from: EUR, currency_to: THB}
+→ Validation: PASSED ✓
+```
+
+### Path 3: AMBIGUOUS Service Match → LLM Confirmation
+
+Multiple services score similarly or cosine is in the medium range.
+
+```
+Query: "Mis on täna ilm?"
+Dense: Ilmapäring (cosine=0.35), gap=0.02
+→ 0.35 ≥ 0.20 but 0.35 < 0.40 → AMBIGUOUS
+→ Runs LLM Intent Detection on top 3 candidates
+→ LLM confirms or rejects → falls back to RAG if rejected
+```
+
+### Fallback Chain
+
+Each workflow returns a response or `None` (fallback to next):
+
+```
+SERVICE (Layer 1)  →  CONTEXT (Layer 2)  →  RAG (Layer 3)  →  OOD (Layer 4)
+```
+
+---
+
+## Intent Detection & Entity Extraction
+
+### When Does It Run?
+
+| Path | Intent Detection | Entity Extraction |
+|------|-----------------|-------------------|
+| HIGH-CONFIDENCE | On 1 service (matched) | Yes — from LLM output |
+| AMBIGUOUS | On 2-3 candidates | Yes — if LLM matches |
+| Non-service | Not run | Not run |
+
+### Intent Detection Module (DSPy)
+
+**File:** `src/tool_classifier/intent_detector.py`
+
+The DSPy `IntentDetectionModule` receives:
+- User query
+- Candidate services (formatted as JSON)
+- Conversation history (last 3 turns)
+
+It returns:
+```json
+{
+    "matched_service_id": "common_service_exchange_rate",
+    "confidence": 0.92,
+    "entities": {
+        "currency_from": "EUR",
+        "currency_to": "THB"
+    },
+    "reasoning": "User wants EUR to THB exchange rate"
+}
+```
+
+### Entity Validation
+
+**File:** `src/tool_classifier/workflows/service_workflow.py` → `_validate_entities()`
+
+Extracted entities are validated against the service's schema:
+
+```
+Schema:    ["currency_from", "currency_to"]
+Extracted: {"currency_from": "EUR", "currency_to": "THB"}
+Result:    PASSED ✓
+```
+
+- **Missing entities** → sent as empty strings (service validates)
+- **Extra entities** → ignored
+- **Validation is lenient** — always proceeds, lets the service endpoint validate
+
+### Entity Transformation
+
+Entities dict → ordered array matching service schema:
+
+```python
+# Schema: ["currency_from", "currency_to"]
+# Dict:   {"currency_from": "EUR", "currency_to": "THB"}
+# Array:  ["EUR", "THB"]
+```
+
+---
+
+## Thresholds & Configuration
+
+All defined in `src/tool_classifier/constants.py`.
+
+### Classification Thresholds
+
+| Constant | Value | Description |
+|----------|-------|-------------|
+| `DENSE_MIN_THRESHOLD` | `0.20` | Minimum cosine to consider any service match. Below → skip SERVICE entirely. Set low because multilingual (Estonian) queries yield lower cosine (0.25–0.55). |
+| `DENSE_HIGH_CONFIDENCE_THRESHOLD` | `0.40` | Cosine for HIGH-CONFIDENCE path. Service queries with correct match score > 0.40 (observed: 0.55). Non-service score 0.27–0.35. |
+| `DENSE_SCORE_GAP_THRESHOLD` | `0.05` | Required gap between top two services. Prevents false positives when multiple services score similarly. Service gaps: ~0.24, non-service gaps: ~0.01. |
+
+### Search Configuration
+
+| Constant | Value | Description |
+|----------|-------|-------------|
+| `DENSE_SEARCH_TOP_K` | `3` | Unique services from dense search |
+| `HYBRID_SEARCH_TOP_K` | `5` | Results from hybrid RRF search |
+
+### Observed Score Distributions
+
+Based on real Estonian query testing:
+
+| Metric | Service Query | Non-Service Query |
+|--------|:------------:|:-----------------:|
+| Top cosine | **0.55** | 0.27 – 0.35 |
+| Cosine gap | **0.24** | 0.005 – 0.017 |
+| Decision | HIGH-CONFIDENCE | AMBIGUOUS → LLM reject |
+
+### Performance by Path
+
+| Path | Latency | LLM Calls | Cost |
+|------|:-------:|:---------:|:----:|
+| Non-service (below threshold) | ~0.3s | 0 | $0 |
+| HIGH-CONFIDENCE service | ~2.0s | 1 | ~$0.002 |
+| AMBIGUOUS service | ~3.5s | 1-2 | ~$0.002–0.004 |
+| Legacy (no classifier) | ~4.0s | 2+ | ~$0.004+ |
+
+### Tuning Recommendations
+
+- **Adding more services:** Score distributions improve naturally — service queries score higher, non-service score lower.
+- **Adding more examples per service:** Diverse phrasings expand the embedding coverage. Aim for 5-8 examples per service covering formal + informal + different word orders.
+- **Adjusting thresholds:** Monitor the logs (`Dense search: top=... cosine=...`) and adjust if real-world scores differ from test data.
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index 2313b94..181b6cf 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -9,7 +9,7 @@
     OrchestrationRequest,
     OrchestrationResponse,
 )
-from tool_classifier.enums import WorkflowType, WORKFLOW_DISPLAY_NAMES
+from tool_classifier.enums import WorkflowType, WORKFLOW_DISPLAY_NAMES, WORKFLOW_LAYER_ORDER
 from tool_classifier.models import ClassificationResult
 from tool_classifier.constants import (
     QDRANT_HOST,
@@ -17,9 +17,10 @@
     QDRANT_COLLECTION,
     QDRANT_TIMEOUT,
     HYBRID_SEARCH_TOP_K,
-    HYBRID_SEARCH_MIN_THRESHOLD,
-    SCORE_RATIO_THRESHOLD,
-    SCORE_GAP_THRESHOLD,
+    DENSE_SEARCH_TOP_K,
+    DENSE_MIN_THRESHOLD,
+    DENSE_HIGH_CONFIDENCE_THRESHOLD,
+    DENSE_SCORE_GAP_THRESHOLD,
 )
 from tool_classifier.sparse_encoder import compute_sparse_vector
 from tool_classifier.workflows import (
@@ -34,7 +35,11 @@ class ToolClassifier:
     """
     Main classifier that determines which workflow should handle user queries.
 
-    Uses Qdrant hybrid search (dense + sparse + RRF fusion) to classify queries:
+    Uses a two-step search approach for classification:
+    1. Dense-only search → real cosine similarity scores for relevance check
+    2. Hybrid search (dense + sparse + RRF) → best service identification
+
+    Routing decisions:
     - High-confidence service match → SERVICE workflow (skip discovery + intent detection)
     - Ambiguous match → SERVICE workflow with LLM confirmation
     - No match → CONTEXT/RAG workflow (skip SERVICE entirely)
@@ -97,16 +102,15 @@ async def classify(
         language: str,
     ) -> ClassificationResult:
         """
-        Classify a user query using Qdrant hybrid search (dense + sparse + RRF).
+        Classify a user query using a two-step search approach.
+
+        Step 1: Dense-only search → cosine similarity for relevance check
+        Step 2: Hybrid search (dense + sparse + RRF) → service identification
 
-        Classification flow:
-        1. Generate dense embedding for the query
-        2. Generate sparse vector for the query (BM25-style)
-        3. Run hybrid search on intent_collections (prefetch dense + sparse → RRF fusion)
-        4. Apply score-gap analysis:
-           - Clear winner (high ratio + gap) → SERVICE with high confidence
-           - Ambiguous (scores exist but close) → SERVICE with LLM confirmation flag
-           - No match (low/no scores) → CONTEXT (skip SERVICE entirely)
+        Routing:
+        - cosine < DENSE_MIN_THRESHOLD → CONTEXT/RAG (skip SERVICE)
+        - cosine ≥ HIGH_CONFIDENCE + large gap → SERVICE (no LLM needed)
+        - else → SERVICE with LLM confirmation
 
         Args:
             query: User's query string
@@ -130,107 +134,126 @@ async def classify(
                     reasoning="Could not generate embedding - skip to Context/RAG",
                 )
 
-            # Step 2: Generate sparse vector for query
-            query_sparse = compute_sparse_vector(query)
-
-            # Step 3: Qdrant hybrid search with RRF fusion
-            results = await self._hybrid_search(
+            # Step 2: Dense-only search → get actual cosine similarity scores
+            dense_results = await self._dense_search(
                 dense_vector=query_embedding,
-                sparse_vector=query_sparse,
-                top_k=HYBRID_SEARCH_TOP_K,
+                top_k=DENSE_SEARCH_TOP_K,
             )
 
-            if not results:
-                logger.info("No hybrid search results - routing to CONTEXT/RAG")
+            if not dense_results:
+                logger.info("No dense search results - routing to CONTEXT/RAG")
                 return ClassificationResult(
                     workflow=WorkflowType.CONTEXT,
                     confidence=1.0,
                     metadata={"reason": "no_service_match"},
-                    reasoning="No services matched the query",
+                    reasoning="No services matched the query (dense search empty)",
                 )
 
-            # Step 4: Score-gap analysis
-            top = results[0]
-            top_score = top.get("rrf_score", 0.0)
-            top_service_id = top.get("service_id", "unknown")
-            top_service_name = top.get("name", "unknown")
-
-            second_score = results[1].get("rrf_score", 0.0) if len(results) > 1 else 0.0
-
-            score_ratio = top_score / max(second_score, 0.0001)
-            score_gap = top_score - second_score
+            top_cosine = dense_results[0].get("cosine_score", 0.0)
+            top_service_name = dense_results[0].get("name", "unknown")
+            second_cosine = dense_results[1].get("cosine_score", 0.0) if len(dense_results) > 1 else 0.0
+            cosine_gap = top_cosine - second_cosine
 
             logger.info(
-                f"Hybrid search results - "
-                f"top: {top_service_name} (score={top_score:.6f}), "
-                f"second: {results[1].get('name', 'none') if len(results) > 1 else 'none'} "
-                f"(score={second_score:.6f}), "
-                f"ratio={score_ratio:.2f}, gap={score_gap:.6f}"
+                f"Dense search: top={top_service_name} "
+                f"(cosine={top_cosine:.4f}), "
+                f"second={dense_results[1].get('name', 'none') if len(dense_results) > 1 else 'none'} "
+                f"(cosine={second_cosine:.4f}), "
+                f"gap={cosine_gap:.4f}"
             )
 
-            # High confidence: clear winner → SERVICE (skip discovery + intent detection)
-            if score_ratio > SCORE_RATIO_THRESHOLD and score_gap > SCORE_GAP_THRESHOLD:
+            # Decision: Is this a service query at all?
+            if top_cosine < DENSE_MIN_THRESHOLD:
                 logger.info(
-                    f"High-confidence service match: {top_service_name} "
-                    f"(ratio={score_ratio:.2f}, gap={score_gap:.6f})"
+                    f"Low relevance (cosine={top_cosine:.4f} < {DENSE_MIN_THRESHOLD}) "
+                    f"- routing to CONTEXT/RAG, skipping SERVICE"
                 )
                 return ClassificationResult(
-                    workflow=WorkflowType.SERVICE,
-                    confidence=min(score_ratio / 5.0, 1.0),
+                    workflow=WorkflowType.CONTEXT,
+                    confidence=1.0,
                     metadata={
-                        "matched_service_id": top_service_id,
-                        "matched_service_name": top_service_name,
-                        "rrf_score": top_score,
-                        "score_gap": score_gap,
-                        "score_ratio": score_ratio,
-                        "needs_llm_confirmation": False,
-                        "top_results": results[:3],
+                        "reason": "below_dense_threshold",
+                        "top_cosine": top_cosine,
+                        "top_service": top_service_name,
                     },
                     reasoning=(
-                        f"High-confidence match: {top_service_name} "
-                        f"(ratio={score_ratio:.2f}, gap={score_gap:.6f})"
+                        f"Dense cosine {top_cosine:.4f} below threshold "
+                        f"{DENSE_MIN_THRESHOLD} - skip to Context/RAG"
                     ),
                 )
 
-            # Medium confidence: ambiguous → SERVICE with LLM confirmation
-            if top_score > HYBRID_SEARCH_MIN_THRESHOLD:
+            # Step 3: Hybrid search → identify best service using RRF
+            query_sparse = compute_sparse_vector(query)
+            hybrid_results = await self._hybrid_search(
+                dense_vector=query_embedding,
+                sparse_vector=query_sparse,
+                top_k=HYBRID_SEARCH_TOP_K,
+            )
+
+            # Use hybrid results for service identification, dense scores for confidence
+            if not hybrid_results:
+                # Dense matched but hybrid didn't — use dense results
+                hybrid_results = dense_results
+
+            top_result = hybrid_results[0]
+            top_service_id = top_result.get("service_id", "unknown")
+            top_service_name_hybrid = top_result.get("name", "unknown")
+
+            logger.info(
+                f"Hybrid search: best service={top_service_name_hybrid} "
+                f"(service_id={top_service_id})"
+            )
+
+            # High confidence: cosine is high AND clear gap to second result
+            if (
+                top_cosine >= DENSE_HIGH_CONFIDENCE_THRESHOLD
+                and cosine_gap >= DENSE_SCORE_GAP_THRESHOLD
+            ):
                 logger.info(
-                    f"Ambiguous service match: {top_service_name} "
-                    f"(score={top_score:.6f}, ratio={score_ratio:.2f}) - needs LLM confirmation"
+                    f"HIGH-CONFIDENCE match: {top_service_name_hybrid} "
+                    f"(cosine={top_cosine:.4f}, gap={cosine_gap:.4f})"
                 )
                 return ClassificationResult(
                     workflow=WorkflowType.SERVICE,
-                    confidence=0.5,
+                    confidence=min(top_cosine, 1.0),
                     metadata={
                         "matched_service_id": top_service_id,
-                        "matched_service_name": top_service_name,
-                        "rrf_score": top_score,
-                        "score_gap": score_gap,
-                        "score_ratio": score_ratio,
-                        "needs_llm_confirmation": True,
-                        "top_results": results[:3],
+                        "matched_service_name": top_service_name_hybrid,
+                        "cosine_score": top_cosine,
+                        "cosine_gap": cosine_gap,
+                        "needs_llm_confirmation": False,
+                        "top_results": hybrid_results[:3],
                     },
                     reasoning=(
-                        f"Ambiguous match: {top_service_name} "
-                        f"(score={top_score:.6f}) - LLM confirmation needed"
+                        f"High-confidence match: {top_service_name_hybrid} "
+                        f"(cosine={top_cosine:.4f}, gap={cosine_gap:.4f})"
                     ),
                 )
 
-            # No confidence: skip SERVICE entirely → CONTEXT/RAG
+            # Medium confidence: above min threshold but ambiguous
             logger.info(
-                f"No service match (top_score={top_score:.6f} below threshold "
-                f"{HYBRID_SEARCH_MIN_THRESHOLD}) - routing to CONTEXT/RAG"
+                f"AMBIGUOUS match: {top_service_name_hybrid} "
+                f"(cosine={top_cosine:.4f}, gap={cosine_gap:.4f}) - needs LLM confirmation"
             )
             return ClassificationResult(
-                workflow=WorkflowType.CONTEXT,
-                confidence=1.0,
-                metadata={"reason": "below_threshold", "top_score": top_score},
-                reasoning=f"Top score {top_score:.6f} below threshold - skip to Context/RAG",
+                workflow=WorkflowType.SERVICE,
+                confidence=0.5,
+                metadata={
+                    "matched_service_id": top_service_id,
+                    "matched_service_name": top_service_name_hybrid,
+                    "cosine_score": top_cosine,
+                    "cosine_gap": cosine_gap,
+                    "needs_llm_confirmation": True,
+                    "top_results": hybrid_results[:3],
+                },
+                reasoning=(
+                    f"Ambiguous match: {top_service_name_hybrid} "
+                    f"(cosine={top_cosine:.4f}) - LLM confirmation needed"
+                ),
             )
 
         except Exception as e:
             logger.error(f"Hybrid classification failed: {e}", exc_info=True)
-            # Fallback: route to CONTEXT/RAG on any error
             return ClassificationResult(
                 workflow=WorkflowType.CONTEXT,
                 confidence=1.0,
@@ -269,6 +292,92 @@ def _get_query_embedding(self, query: str) -> Optional[List[float]]:
             logger.error(f"Failed to generate query embedding: {e}")
             return None
 
+    async def _dense_search(
+        self,
+        dense_vector: List[float],
+        top_k: int = DENSE_SEARCH_TOP_K,
+    ) -> List[Dict[str, Any]]:
+        """Execute dense-only search on Qdrant to get actual cosine similarity scores.
+
+        This is used as a pre-filter: the cosine scores tell us HOW RELEVANT
+        the top results actually are, unlike RRF scores which are purely rank-based.
+
+        Args:
+            dense_vector: Dense embedding vector (3072-dim)
+            top_k: Number of results to return
+
+        Returns:
+            List of result dicts with service metadata and cosine_score,
+            deduplicated by service_id (best score per service)
+        """
+        try:
+            search_payload = {
+                "query": dense_vector,
+                "using": "dense",
+                "limit": top_k * 2,  # Get more to allow dedup by service
+                "with_payload": True,
+            }
+
+            response = await self._qdrant_client.post(
+                f"/collections/{QDRANT_COLLECTION}/points/query",
+                json=search_payload,
+            )
+
+            if response.status_code != 200:
+                logger.error(
+                    f"Qdrant dense search failed: HTTP {response.status_code} - "
+                    f"{response.text}"
+                )
+                return []
+
+            search_results = response.json()
+            points = search_results.get("result", {}).get("points", [])
+
+            if not points:
+                logger.info("No results from dense search")
+                return []
+
+            # Deduplicate by service_id (keep best cosine score per service)
+            service_results: Dict[str, Dict[str, Any]] = {}
+            for point in points:
+                payload = point.get("payload", {})
+                score = float(point.get("score", 0))
+                service_id = payload.get("service_id", "unknown")
+
+                if service_id not in service_results or score > service_results[service_id].get("cosine_score", 0):
+                    service_results[service_id] = {
+                        "service_id": service_id,
+                        "name": payload.get("name", ""),
+                        "description": payload.get("description", ""),
+                        "examples": payload.get("examples", []),
+                        "entities": payload.get("entities", []),
+                        "context": payload.get("context", ""),
+                        "point_type": payload.get("point_type", "unknown"),
+                        "example_text": payload.get("example_text"),
+                        "cosine_score": score,
+                    }
+
+            # Sort by cosine score descending
+            sorted_results = sorted(
+                service_results.values(),
+                key=lambda x: x["cosine_score"],
+                reverse=True,
+            )
+
+            logger.info(
+                f"Dense search found {len(sorted_results)} unique services "
+                f"(top cosine: {sorted_results[0]['cosine_score']:.4f})"
+            )
+
+            return sorted_results
+
+        except httpx.TimeoutException:
+            logger.error(f"Qdrant dense search timeout after {QDRANT_TIMEOUT}s")
+            return []
+        except Exception as e:
+            logger.error(f"Dense search failed: {e}", exc_info=True)
+            return []
+
     async def _hybrid_search(
         self,
         dense_vector: List[float],
@@ -535,7 +644,6 @@ async def _execute_with_fallback_async(
             )
 
             # Get the layer order starting from current layer
-            from tool_classifier.enums import WORKFLOW_LAYER_ORDER
 
             current_index = WORKFLOW_LAYER_ORDER.index(start_layer)
             remaining_layers = WORKFLOW_LAYER_ORDER[current_index + 1 :]
@@ -557,7 +665,6 @@ async def _execute_with_fallback_async(
                     return result
 
                 logger.info(f"[{chat_id}] {next_name} returned None, continuing...")
-                current_index += 1
 
             # This should never happen since RAG/OOD should always return result
             raise RuntimeError("All workflows returned None (unexpected)")
@@ -617,7 +724,6 @@ async def _execute_with_fallback_streaming(
             )
 
             # Get the layer order starting from current layer
-            from tool_classifier.enums import WORKFLOW_LAYER_ORDER
 
             current_index = WORKFLOW_LAYER_ORDER.index(start_layer)
             remaining_layers = WORKFLOW_LAYER_ORDER[current_index + 1 :]
@@ -642,7 +748,6 @@ async def _execute_with_fallback_streaming(
                     return
 
                 logger.info(f"[{chat_id}] {next_name} returned None, continuing...")
-                current_index += 1
 
             # This should never happen
             raise RuntimeError("All workflows returned None in streaming (unexpected)")
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index 7db6aa9..9c4adf8 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -65,15 +65,21 @@
 # ============================================================================
 
 HYBRID_SEARCH_TOP_K = 5
-"""Number of top results from hybrid search for classification."""
+"""Number of top results from hybrid search for service identification."""
 
-HYBRID_SEARCH_MIN_THRESHOLD = 0.01
-"""Minimum RRF score to consider a result as a potential match."""
+DENSE_SEARCH_TOP_K = 3
+"""Number of top results from dense-only search for relevance scoring."""
 
-SCORE_RATIO_THRESHOLD = 2.0
-"""Score ratio (top/second) for confident service classification.
-If the top result's RRF score is > 2x the second result, it's a high-confidence match."""
+DENSE_MIN_THRESHOLD = 0.20
+"""Minimum dense cosine similarity to consider a result as a potential match.
+Below this → skip SERVICE entirely, go to CONTEXT/RAG.
+Note: Multilingual embeddings (Estonian/short queries) typically yield
+lower cosine scores (0.25-0.40) than English. Tune based on observed scores."""
 
-SCORE_GAP_THRESHOLD = 0.005
-"""Absolute score gap for confident classification.
-Prevents false positives when both scores are very low."""
+DENSE_HIGH_CONFIDENCE_THRESHOLD = 0.40
+"""Dense cosine similarity for high-confidence service classification.
+Above this AND score gap is large → SERVICE without LLM confirmation."""
+
+DENSE_SCORE_GAP_THRESHOLD = 0.05
+"""Cosine score gap (top - second) for high-confidence classification.
+Ensures the top result is significantly better than the runner-up."""

From 609e6d583cd2662803770656f8b4127f2c10ce3d Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Sun, 1 Mar 2026 10:46:15 +0530
Subject: [PATCH 16/27] fixing merge conflicts

---
 src/llm_orchestration_service.py              | 314 ++++++++++--------
 src/tool_classifier/base_workflow.py          |  10 +-
 src/tool_classifier/classifier.py             |  34 +-
 .../workflows/context_workflow.py             |  10 +-
 src/tool_classifier/workflows/ood_workflow.py |   8 +-
 src/tool_classifier/workflows/rag_workflow.py |  36 +-
 .../workflows/service_workflow.py             |  61 ++--
 src/utils/time_tracker.py                     |  10 +-
 8 files changed, 255 insertions(+), 228 deletions(-)

diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 7432957..5095893 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -133,45 +133,69 @@ def __init__(self) -> None:
         # This allows components to be initialized per-request with proper context
         self.tool_classifier = None
 
-        # Initialize shared guardrails adapter at startup
-        self.shared_guardrails_adapter = self._initialize_shared_guardrails_at_startup()
+        # Initialize shared guardrails adapters at startup (production and testing)
+        self.shared_guardrails_adapters = (
+            self._initialize_shared_guardrails_at_startup()
+        )
 
         # Log feature flag configuration
         FeatureFlags.log_configuration()
 
-    def _initialize_shared_guardrails_at_startup(self) -> Optional[NeMoRailsAdapter]:
+    def _initialize_shared_guardrails_at_startup(self) -> Dict[str, NeMoRailsAdapter]:
         """
-        Initialize shared guardrails at startup.
+        Initialize shared guardrails adapters at startup for production and testing environments.
 
         Returns:
-            NeMoRailsAdapter if successful, None on failure (graceful degradation)
+            Dictionary mapping environment names to NeMoRailsAdapter instances.
+            Empty dict on failure (graceful degradation).
         """
-        try:
-            logger.info("  Initializing shared guardrails at startup...")
-            start_time = time.time()
+        adapters: Dict[str, NeMoRailsAdapter] = {}
 
-            # Initialize with production environment and no specific connection
-            # This creates a shared guardrails instance using default/production config
-            guardrails_adapter = self._initialize_guardrails(
-                environment="production",
-                connection_id=None,  # Shared configuration, not user-specific
-            )
+        # Initialize adapters for commonly-used environments
+        environments_to_initialize = ["production", "testing"]
 
-            elapsed_time = time.time() - start_time
-            logger.info(
-                f" Shared guardrails initialized successfully in {elapsed_time:.3f}s"
-            )
+        logger.info("  Initializing shared guardrails at startup...")
+        total_start_time = time.time()
 
-            return guardrails_adapter
+        for env in environments_to_initialize:
+            try:
+                logger.info(f"  Initializing guardrails for environment: {env}")
+                start_time = time.time()
 
-        except Exception as e:
-            logger.error(f" Failed to initialize shared guardrails at startup: {e}")
+                # Initialize with specific environment and no connection (shared config)
+                guardrails_adapter = self._initialize_guardrails(
+                    environment=env,
+                    connection_id=None,  # Shared configuration, not user-specific
+                )
+
+                elapsed_time = time.time() - start_time
+                adapters[env] = guardrails_adapter
+                logger.info(
+                    f" Guardrails for '{env}' initialized successfully in {elapsed_time:.3f}s"
+                )
+
+            except Exception as e:
+                logger.error(f" Failed to initialize guardrails for '{env}': {e}")
+                logger.warning(
+                    f"  Service will fall back to per-request initialization for '{env}' environment"
+                )
+                # Continue with other environments - partial success is acceptable
+                continue
+
+        total_elapsed = time.time() - total_start_time
+
+        if adapters:
+            logger.info(
+                f" Shared guardrails initialized for {len(adapters)} environment(s) "
+                f"in {total_elapsed:.3f}s total"
+            )
+        else:
             logger.error(
-                "  Service will continue without guardrails (graceful degradation)"
+                "  Failed to initialize any shared guardrails - "
+                "service will use per-request initialization (slower)"
             )
-            # Return None - service continues without guardrails
-            # Per-request fallback will be attempted if needed
-            return None
+
+        return adapters
 
     @observe(name="orchestration_request", as_type="agent")
     async def process_orchestration_request(
@@ -197,8 +221,8 @@ async def process_orchestration_request(
         Raises:
             Exception: For any processing errors
         """
-        costs_dict: Dict[str, Dict[str, Any]] = {}
-        timing_dict: Dict[str, float] = {}
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        time_metric: Dict[str, float] = {}
 
         try:
             logger.info(
@@ -210,7 +234,7 @@ async def process_orchestration_request(
             start_time = time.time()
             detected_language = detect_language(request.message)
             language_name = get_language_name(detected_language)
-            timing_dict["language_detection"] = time.time() - start_time
+            time_metric["language_detection"] = time.time() - start_time
             logger.info(
                 f"[{request.chatId}] Detected language: {language_name} ({detected_language})"
             )
@@ -222,7 +246,7 @@ async def process_orchestration_request(
             # STEP 0.5: Basic Query Validation (before expensive component initialization)
             start_time = time.time()
             validation_result = validate_query_basic(request.message)
-            timing_dict["query_validation"] = time.time() - start_time
+            time_metric["query_validation"] = time.time() - start_time
             if not validation_result.is_valid:
                 logger.info(
                     f"[{request.chatId}] Query validation failed: {validation_result.rejection_reason}"
@@ -253,21 +277,21 @@ async def process_orchestration_request(
             # Initialize all service components (only for valid queries, with timing)
             start_time = time.time()
             components = self._initialize_service_components(request)
-            timing_dict["initialization"] = time.time() - start_time
+            time_metric["initialization"] = time.time() - start_time
 
             if components["guardrails_adapter"]:
                 start_time = time.time()
                 input_blocked_response = await self.handle_input_guardrails(
                     components["guardrails_adapter"], request, {}
                 )
-                timing_dict["input_guardrails_check"] = time.time() - start_time
+                time_metric["input_guardrails_check"] = time.time() - start_time
 
                 if input_blocked_response:
                     logger.warning(
                         f"[{request.chatId}] Input blocked before classifier - "
                         f"saved expensive service discovery"
                     )
-                    log_step_timings(timing_dict, request.chatId)
+                    log_step_timings(time_metric, request.chatId)
                     return input_blocked_response
             else:
                 logger.info(
@@ -298,7 +322,7 @@ async def process_orchestration_request(
                         conversation_history=request.conversationHistory,
                         language=detected_language,
                     )
-                    timing_dict["classifier.classify"] = time.time() - start_time
+                    time_metric["classifier.classify"] = time.time() - start_time
 
                     logger.info(
                         f"[{request.chatId}] Classification: {classification.workflow.value} "
@@ -311,9 +335,9 @@ async def process_orchestration_request(
                         classification=classification,
                         request=request,
                         is_streaming=False,
-                        timing_dict=timing_dict,
+                        time_metric=time_metric,
                     )
-                    timing_dict["classifier.route"] = time.time() - start_time
+                    time_metric["classifier.route"] = time.time() - start_time
 
                 except Exception as classifier_error:
                     logger.error(
@@ -327,7 +351,7 @@ async def process_orchestration_request(
                         )
                         # Execute existing RAG pipeline as fallback
                         response = await self._execute_orchestration_pipeline(
-                            request, components, costs_dict, timing_dict
+                            request, components, costs_metric, time_metric
                         )
                     else:
                         raise
@@ -337,27 +361,27 @@ async def process_orchestration_request(
                     f"[{request.chatId}] Tool classifier disabled - using RAG pipeline"
                 )
                 response = await self._execute_orchestration_pipeline(
-                    request, components, costs_dict, timing_dict
+                    request, components, costs_metric, time_metric
                 )
 
             # Log final costs and return response
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
 
             # Update budget for the LLM connection
             self._update_connection_budget(
-                request.connection_id, costs_dict, request.environment
+                request.connection_id, costs_metric, request.environment
             )
 
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
-                total_costs = calculate_total_costs(costs_dict)
+                total_costs = calculate_total_costs(costs_metric)
 
                 total_input_tokens = sum(
-                    c.get("total_prompt_tokens", 0) for c in costs_dict.values()
+                    c.get("total_prompt_tokens", 0) for c in costs_metric.values()
                 )
                 total_output_tokens = sum(
-                    c.get("total_completion_tokens", 0) for c in costs_dict.values()
+                    c.get("total_completion_tokens", 0) for c in costs_metric.values()
                 )
 
                 langfuse.update_current_generation(
@@ -374,7 +398,7 @@ async def process_orchestration_request(
                     },
                     metadata={
                         "total_calls": total_costs.get("total_calls", 0),
-                        "cost_breakdown": costs_dict,
+                        "cost_breakdown": costs_metric,
                         "chat_id": request.chatId,
                         "author_id": request.authorId,
                         "environment": request.environment,
@@ -398,12 +422,12 @@ async def process_orchestration_request(
                     }
                 )
                 langfuse.flush()
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
 
             # Update budget even on error
             self._update_connection_budget(
-                request.connection_id, costs_dict, request.environment
+                request.connection_id, costs_metric, request.environment
             )
 
             return self._create_error_response(request)
@@ -446,14 +470,14 @@ async def stream_orchestration_response(
         """
 
         # Track costs after streaming completes
-        costs_dict: Dict[str, Dict[str, Any]] = {}
-        timing_dict: Dict[str, float] = {}
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        time_metric: Dict[str, float] = {}
 
         # STEP 0: Detect language from user message (with timing)
         start_time = time.time()
         detected_language = detect_language(request.message)
         language_name = get_language_name(detected_language)
-        timing_dict["language_detection"] = time.time() - start_time
+        time_metric["language_detection"] = time.time() - start_time
         logger.info(
             f"[{request.chatId}] Streaming request - Detected language: {language_name} ({detected_language})"
         )
@@ -465,7 +489,7 @@ async def stream_orchestration_response(
         # Step 0.5: Basic Query Validation (before guardrails, with timing)
         start_time = time.time()
         validation_result = validate_query_basic(request.message)
-        timing_dict["query_validation"] = time.time() - start_time
+        time_metric["query_validation"] = time.time() - start_time
         if not validation_result.is_valid:
             logger.info(
                 f"[{request.chatId}] Streaming - Query validation failed: {validation_result.rejection_reason}"
@@ -493,7 +517,7 @@ async def stream_orchestration_response(
                 # Initialize all service components (with timing)
                 start_time = time.time()
                 components = self._initialize_service_components(request)
-                timing_dict["initialization"] = time.time() - start_time
+                time_metric["initialization"] = time.time() - start_time
 
                 # This implements fail-fast principle - block malicious/policy-violating inputs
                 # before expensive operations (service discovery, LLM calls, streaming setup)
@@ -506,9 +530,9 @@ async def stream_orchestration_response(
                     input_check_result = await self._check_input_guardrails_async(
                         guardrails_adapter=components["guardrails_adapter"],
                         user_message=request.message,
-                        costs_dict=costs_dict,
+                        costs_metric=costs_metric,
                     )
-                    timing_dict["input_guardrails_check"] = time.time() - start_time
+                    time_metric["input_guardrails_check"] = time.time() - start_time
 
                     if not input_check_result.allowed:
                         logger.warning(
@@ -519,9 +543,9 @@ async def stream_orchestration_response(
                             request.chatId, INPUT_GUARDRAIL_VIOLATION_MESSAGE
                         )
                         yield self.format_sse(request.chatId, "END")
-                        self.log_costs(costs_dict)
+                        self.log_costs(costs_metric)
                         # Log timings before returning (for visibility)
-                        log_step_timings(timing_dict, request.chatId)
+                        log_step_timings(time_metric, request.chatId)
                         stream_ctx.mark_completed()
                         return
                 else:
@@ -581,8 +605,8 @@ async def stream_orchestration_response(
                         )
 
                         # Log costs and timings
-                        self.log_costs(costs_dict)
-                        log_step_timings(timing_dict, request.chatId)
+                        self.log_costs(costs_metric)
+                        log_step_timings(time_metric, request.chatId)
                         stream_ctx.mark_completed()
                         return  # Exit after successful classifier routing
 
@@ -612,8 +636,8 @@ async def stream_orchestration_response(
                     request=request,
                     components=components,
                     stream_ctx=stream_ctx,
-                    costs_dict=costs_dict,
-                    timing_dict=timing_dict,
+                    costs_metric=costs_metric,
+                    time_metric=time_metric,
                 ):
                     yield sse_chunk
 
@@ -630,12 +654,12 @@ async def stream_orchestration_response(
                 yield self.format_sse(request.chatId, TECHNICAL_ISSUE_MESSAGE)
                 yield self.format_sse(request.chatId, "END")
 
-                self.log_costs(costs_dict)
-                log_step_timings(timing_dict, request.chatId)
+                self.log_costs(costs_metric)
+                log_step_timings(time_metric, request.chatId)
 
                 # Update budget even on outer exception
                 self._update_connection_budget(
-                    request.connection_id, costs_dict, request.environment
+                    request.connection_id, costs_metric, request.environment
                 )
 
                 if self.langfuse_config.langfuse_client:
@@ -656,8 +680,8 @@ async def _stream_rag_pipeline(
         request: OrchestrationRequest,
         components: Dict[str, Any],
         stream_ctx: Any,
-        costs_dict: Dict[str, Dict[str, Any]],
-        timing_dict: Dict[str, float],
+        costs_metric: Dict[str, Dict[str, Any]],
+        time_metric: Dict[str, float],
     ) -> AsyncIterator[str]:
         """
         Core RAG streaming pipeline without classifier routing.
@@ -675,8 +699,8 @@ async def _stream_rag_pipeline(
             request: Orchestration request
             components: Initialized service components (LLM, retriever, generator, guardrails)
             stream_ctx: Stream context for tracking
-            costs_dict: Dictionary to accumulate costs
-            timing_dict: Dictionary to accumulate timings
+            costs_metric: Dictionary to accumulate costs
+            time_metric: Dictionary to accumulate timings
 
         Yields:
             SSE-formatted strings
@@ -695,8 +719,8 @@ async def _stream_rag_pipeline(
             original_message=request.message,
             conversation_history=request.conversationHistory,
         )
-        timing_dict["prompt_refiner"] = time.time() - start_time
-        costs_dict["prompt_refiner"] = refiner_usage
+        time_metric["prompt_refiner"] = time.time() - start_time
+        costs_metric["prompt_refiner"] = refiner_usage
 
         logger.info(
             f"[{request.chatId}] [{stream_ctx.stream_id}] Prompt refinement complete"
@@ -712,7 +736,7 @@ async def _stream_rag_pipeline(
             relevant_chunks = await self._safe_retrieve_contextual_chunks(
                 components["contextual_retriever"], refined_output, request
             )
-            timing_dict["contextual_retrieval"] = time.time() - start_time
+            time_metric["contextual_retrieval"] = time.time() - start_time
         except (
             ContextualRetrieverInitializationError,
             ContextualRetrievalFailureError,
@@ -728,8 +752,8 @@ async def _stream_rag_pipeline(
             )
             yield self.format_sse(request.chatId, localized_msg)
             yield self.format_sse(request.chatId, "END")
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
             stream_ctx.mark_completed()
             return
 
@@ -742,8 +766,8 @@ async def _stream_rag_pipeline(
             )
             yield self.format_sse(request.chatId, localized_msg)
             yield self.format_sse(request.chatId, "END")
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
             stream_ctx.mark_completed()
             return
 
@@ -762,7 +786,7 @@ async def _stream_rag_pipeline(
             chunks=relevant_chunks,
             max_blocks=ResponseGenerationConstants.DEFAULT_MAX_BLOCKS,
         )
-        timing_dict["scope_check"] = time.time() - start_time
+        time_metric["scope_check"] = time.time() - start_time
 
         if is_out_of_scope:
             logger.info(
@@ -773,8 +797,8 @@ async def _stream_rag_pipeline(
             )
             yield self.format_sse(request.chatId, localized_msg)
             yield self.format_sse(request.chatId, "END")
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
             stream_ctx.mark_completed()
             return
 
@@ -842,9 +866,9 @@ async def bot_response_generator() -> AsyncIterator[str]:
                             yield self.format_sse(request.chatId, "END")
 
                             usage_info = get_lm_usage_since(history_length_before)
-                            costs_dict["streaming_generation"] = usage_info
-                            self.log_costs(costs_dict)
-                            log_step_timings(timing_dict, request.chatId)
+                            costs_metric["streaming_generation"] = usage_info
+                            self.log_costs(costs_metric)
+                            log_step_timings(time_metric, request.chatId)
                             stream_ctx.mark_completed()
                             return
 
@@ -871,9 +895,9 @@ async def bot_response_generator() -> AsyncIterator[str]:
                             yield self.format_sse(request.chatId, "END")
 
                             usage_info = get_lm_usage_since(history_length_before)
-                            costs_dict["streaming_generation"] = usage_info
-                            self.log_costs(costs_dict)
-                            log_step_timings(timing_dict, request.chatId)
+                            costs_metric["streaming_generation"] = usage_info
+                            self.log_costs(costs_metric)
+                            log_step_timings(time_metric, request.chatId)
                             stream_ctx.mark_completed()
                             return
 
@@ -940,11 +964,11 @@ async def bot_response_generator() -> AsyncIterator[str]:
 
             # Extract usage information after streaming completes
             usage_info = get_lm_usage_since(history_length_before)
-            costs_dict["streaming_generation"] = usage_info
+            costs_metric["streaming_generation"] = usage_info
 
             # Record timings
-            timing_dict["streaming_generation"] = time.time() - streaming_step_start
-            timing_dict["output_guardrails"] = 0.0  # Inline during streaming
+            time_metric["streaming_generation"] = time.time() - streaming_step_start
+            time_metric["output_guardrails"] = 0.0  # Inline during streaming
 
             # Calculate streaming duration
             streaming_duration = (datetime.now() - streaming_start_time).total_seconds()
@@ -953,18 +977,18 @@ async def bot_response_generator() -> AsyncIterator[str]:
             )
 
             # Log costs and trace
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
 
             # Update budget
             self._update_connection_budget(
-                request.connection_id, costs_dict, request.environment
+                request.connection_id, costs_metric, request.environment
             )
 
             # Langfuse tracking
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
-                total_costs = calculate_total_costs(costs_dict)
+                total_costs = calculate_total_costs(costs_metric)
 
                 langfuse.update_current_generation(
                     model=components["llm_manager"]
@@ -980,7 +1004,7 @@ async def bot_response_generator() -> AsyncIterator[str]:
                         "streaming": True,
                         "streaming_duration_seconds": streaming_duration,
                         "chunks_streamed": chunk_count,
-                        "cost_breakdown": costs_dict,
+                        "cost_breakdown": costs_metric,
                         "chat_id": request.chatId,
                         "environment": request.environment,
                         "stream_id": stream_ctx.stream_id,
@@ -1015,13 +1039,13 @@ async def bot_response_generator() -> AsyncIterator[str]:
                 f"[{request.chatId}] [{stream_ctx.stream_id}] Client disconnected"
             )
             usage_info = get_lm_usage_since(history_length_before)
-            costs_dict["streaming_generation"] = usage_info
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            costs_metric["streaming_generation"] = usage_info
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
 
             # Update budget even on client disconnect
             self._update_connection_budget(
-                request.connection_id, costs_dict, request.environment
+                request.connection_id, costs_metric, request.environment
             )
             raise
         except Exception as stream_error:
@@ -1038,13 +1062,13 @@ async def bot_response_generator() -> AsyncIterator[str]:
             yield self.format_sse(request.chatId, "END")
 
             usage_info = get_lm_usage_since(history_length_before)
-            costs_dict["streaming_generation"] = usage_info
-            self.log_costs(costs_dict)
-            log_step_timings(timing_dict, request.chatId)
+            costs_metric["streaming_generation"] = usage_info
+            self.log_costs(costs_metric)
+            log_step_timings(time_metric, request.chatId)
 
             # Update budget even on streaming error
             self._update_connection_budget(
-                request.connection_id, costs_dict, request.environment
+                request.connection_id, costs_metric, request.environment
             )
 
     def format_sse(self, chat_id: str, content: str) -> str:
@@ -1079,16 +1103,18 @@ def _initialize_service_components(
             environment=request.environment, connection_id=request.connection_id
         )
 
-        # Use shared guardrails adapter (initialized at startup)
-        # Falls back to per-request initialization if shared instance unavailable
-        if self.shared_guardrails_adapter is not None:
-            logger.debug(
-                "Using shared guardrails adapter (startup-initialized, zero overhead)"
+        if request.environment in self.shared_guardrails_adapters:
+            logger.info(
+                f" Using shared guardrails adapter for environment='{request.environment}' "
+                f"(startup-initialized, zero overhead)"
             )
-            components["guardrails_adapter"] = self.shared_guardrails_adapter
+            components["guardrails_adapter"] = self.shared_guardrails_adapters[
+                request.environment
+            ]
         else:
             logger.warning(
-                "Shared guardrails unavailable, initializing per-request (slower)"
+                f" Shared guardrails unavailable for environment='{request.environment}', "
+                f"initializing per-request (slower)"
             )
             components["guardrails_adapter"] = self._safe_initialize_guardrails(
                 request.environment, request.connection_id
@@ -1203,8 +1229,8 @@ async def _execute_orchestration_pipeline(
         self,
         request: OrchestrationRequest,
         components: Dict[str, Any],
-        costs_dict: Dict[str, Dict[str, Any]],
-        timing_dict: Dict[str, float],
+        costs_metric: Dict[str, Dict[str, Any]],
+        time_metric: Dict[str, float],
         prefix: str = "",
     ) -> Union[OrchestrationResponse, TestOrchestrationResponse]:
         """Execute the main orchestration pipeline with all components.
@@ -1212,8 +1238,8 @@ async def _execute_orchestration_pipeline(
         Args:
             request: Orchestration request
             components: Initialized service components
-            costs_dict: Dictionary for cost tracking
-            timing_dict: Dictionary for timing tracking
+            costs_metric: Dictionary for cost tracking
+            time_metric: Dictionary for timing tracking
             prefix: Optional prefix for timing keys (e.g., "rag" for workflow namespacing)
         """
         # Note: Query validation AND input guardrails check now happen at orchestration level
@@ -1228,8 +1254,8 @@ async def _execute_orchestration_pipeline(
             conversation_history=request.conversationHistory,
         )
         timing_key = f"{prefix}.prompt_refiner" if prefix else "prompt_refiner"
-        timing_dict[timing_key] = time.time() - start_time
-        costs_dict["prompt_refiner"] = refiner_usage
+        time_metric[timing_key] = time.time() - start_time
+        costs_metric["prompt_refiner"] = refiner_usage
 
         # Step 2: Retrieve relevant chunks using contextual retrieval
         try:
@@ -1240,7 +1266,7 @@ async def _execute_orchestration_pipeline(
             timing_key = (
                 f"{prefix}.contextual_retrieval" if prefix else "contextual_retrieval"
             )
-            timing_dict[timing_key] = time.time() - start_time
+            time_metric[timing_key] = time.time() - start_time
         except (
             ContextualRetrieverInitializationError,
             ContextualRetrievalFailureError,
@@ -1261,12 +1287,12 @@ async def _execute_orchestration_pipeline(
             refined_output=refined_output,
             relevant_chunks=relevant_chunks,
             response_generator=components["response_generator"],
-            costs_dict=costs_dict,
+            costs_metric=costs_metric,
         )
         timing_key = (
             f"{prefix}.response_generation" if prefix else "response_generation"
         )
-        timing_dict[timing_key] = time.time() - start_time
+        time_metric[timing_key] = time.time() - start_time
 
         # Step 4: Output Guardrails Check
         # Apply guardrails to all response types for consistent safety across all environments
@@ -1275,12 +1301,12 @@ async def _execute_orchestration_pipeline(
             components["guardrails_adapter"],
             generated_response,
             request,
-            costs_dict,
+            costs_metric,
         )
         timing_key = (
             f"{prefix}.output_guardrails_check" if prefix else "output_guardrails_check"
         )
-        timing_dict[timing_key] = time.time() - start_time
+        time_metric[timing_key] = time.time() - start_time
 
         # Step 5: Store inference data (for production and testing environments)
         # Only store OrchestrationResponse (has chatId), not TestOrchestrationResponse
@@ -1353,13 +1379,13 @@ async def handle_input_guardrails(
         self,
         guardrails_adapter: NeMoRailsAdapter,
         request: OrchestrationRequest,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> Union[OrchestrationResponse, TestOrchestrationResponse, None]:
         """Check input guardrails and return blocked response if needed."""
         input_check_result = await self._check_input_guardrails_async(
             guardrails_adapter=guardrails_adapter,
             user_message=request.message,
-            costs_dict=costs_dict,
+            costs_metric=costs_metric,
         )
 
         if not input_check_result.allowed:
@@ -1479,7 +1505,7 @@ async def handle_output_guardrails(
         guardrails_adapter: Optional[NeMoRailsAdapter],
         generated_response: Union[OrchestrationResponse, TestOrchestrationResponse],
         request: OrchestrationRequest,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> Union[OrchestrationResponse, TestOrchestrationResponse]:
         """Check output guardrails and handle blocked responses for both response types."""
         # Determine if we should run guardrails (same logic for both response types)
@@ -1495,7 +1521,7 @@ async def handle_output_guardrails(
             output_check_result = await self._check_output_guardrails(
                 guardrails_adapter=guardrails_adapter,
                 assistant_message=generated_response.content,
-                costs_dict=costs_dict,
+                costs_metric=costs_metric,
             )
 
             if not output_check_result.allowed:
@@ -1772,7 +1798,7 @@ async def _check_input_guardrails_async(
         self,
         guardrails_adapter: NeMoRailsAdapter,
         user_message: str,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> GuardrailCheckResult:
         """
         Check user input against guardrails and track costs (async version).
@@ -1780,7 +1806,7 @@ async def _check_input_guardrails_async(
         Args:
             guardrails_adapter: The guardrails adapter instance
             user_message: The user message to check
-            costs_dict: Dictionary to store cost information
+            costs_metric: Dictionary to store cost information
 
         Returns:
             GuardrailCheckResult: Result of the guardrail check
@@ -1792,7 +1818,7 @@ async def _check_input_guardrails_async(
             result = await guardrails_adapter.check_input_async(user_message)
 
             # Store guardrail costs
-            costs_dict["input_guardrails"] = result.usage
+            costs_metric["input_guardrails"] = result.usage
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
                 langfuse.update_current_generation(
@@ -1845,7 +1871,7 @@ def _check_input_guardrails(
         self,
         guardrails_adapter: NeMoRailsAdapter,
         user_message: str,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> GuardrailCheckResult:
         """
         Check user input against guardrails and track costs (sync version for non-streaming).
@@ -1853,7 +1879,7 @@ def _check_input_guardrails(
         Args:
             guardrails_adapter: The guardrails adapter instance
             user_message: The user message to check
-            costs_dict: Dictionary to store cost information
+            costs_metric: Dictionary to store cost information
 
         Returns:
             GuardrailCheckResult: Result of the guardrail check
@@ -1864,7 +1890,7 @@ def _check_input_guardrails(
             result = guardrails_adapter.check_input(user_message)
 
             # Store guardrail costs
-            costs_dict["input_guardrails"] = result.usage
+            costs_metric["input_guardrails"] = result.usage
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
                 langfuse.update_current_generation(
@@ -1917,7 +1943,7 @@ async def _check_output_guardrails(
         self,
         guardrails_adapter: NeMoRailsAdapter,
         assistant_message: str,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> GuardrailCheckResult:
         """
         Check assistant output against guardrails and track costs.
@@ -1925,7 +1951,7 @@ async def _check_output_guardrails(
         Args:
             guardrails_adapter: The guardrails adapter instance
             assistant_message: The assistant message to check
-            costs_dict: Dictionary to store cost information
+            costs_metric: Dictionary to store cost information
 
         Returns:
             GuardrailCheckResult: Result of the guardrail check
@@ -1936,7 +1962,7 @@ async def _check_output_guardrails(
             result = await guardrails_adapter.check_output_async(assistant_message)
 
             # Store guardrail costs
-            costs_dict["output_guardrails"] = result.usage
+            costs_metric["output_guardrails"] = result.usage
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
                 langfuse.update_current_generation(
@@ -1986,22 +2012,22 @@ async def _check_output_guardrails(
                 usage={},
             )
 
-    def log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
+    def log_costs(self, costs_metric: Dict[str, Dict[str, Any]]) -> None:
         """
         Log cost information for tracking.
 
         Args:
-            costs_dict: Dictionary of costs per component
+            costs_metric: Dictionary of costs per component
         """
         try:
-            if not costs_dict:
+            if not costs_metric:
                 return
 
-            total_costs = calculate_total_costs(costs_dict)
+            total_costs = calculate_total_costs(costs_metric)
 
             logger.info("LLM USAGE COSTS BREAKDOWN:")
 
-            for component, costs in costs_dict.items():
+            for component, costs in costs_metric.items():
                 logger.info(
                     f"  {component:20s}: ${costs.get('total_cost', 0):.6f} "
                     f"({costs.get('num_calls', 0)} calls, "
@@ -2055,7 +2081,7 @@ def log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
     def _update_connection_budget(
         self,
         connection_id: Optional[str],
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
         environment: str = "development",
     ) -> None:
         """
@@ -2064,7 +2090,7 @@ def _update_connection_budget(
 
         Args:
             connection_id: The LLM connection ID (optional)
-            costs_dict: Dictionary of costs per component
+            costs_metric: Dictionary of costs per component
             environment: The deployment environment (production/testing/development)
         """
         try:
@@ -2092,7 +2118,9 @@ def _update_connection_budget(
                         f"Error fetching production connection ID: {str(fetch_error)}"
                     )
 
-            result = budget_tracker.update_budget_from_costs(connection_id, costs_dict)
+            result = budget_tracker.update_budget_from_costs(
+                connection_id, costs_metric
+            )
 
             if result.get("success"):
                 if result.get("budget_exceeded"):
@@ -2447,7 +2475,7 @@ def _generate_rag_response(
         refined_output: PromptRefinerOutput,
         relevant_chunks: List[Dict[str, Union[str, float, Dict[str, Any]]]],
         response_generator: Optional[ResponseGeneratorAgent] = None,
-        costs_dict: Optional[Dict[str, Dict[str, Any]]] = None,
+        costs_metric: Optional[Dict[str, Dict[str, Any]]] = None,
     ) -> Union[OrchestrationResponse, TestOrchestrationResponse]:
         """
         Generate response using retrieved chunks and ResponseGeneratorAgent only.
@@ -2455,8 +2483,8 @@ def _generate_rag_response(
         """
         logger.info("Starting RAG response generation")
 
-        if costs_dict is None:
-            costs_dict = {}
+        if costs_metric is None:
+            costs_metric = {}
 
         # If response generator is not available -> standardized technical issue
         if response_generator is None:
@@ -2514,7 +2542,7 @@ def _generate_rag_response(
                     "num_calls": 0,
                 },
             )
-            costs_dict["response_generator"] = generator_usage
+            costs_metric["response_generator"] = generator_usage
             if self.langfuse_config.langfuse_client:
                 langfuse = self.langfuse_config.langfuse_client
                 langfuse.update_current_generation(
@@ -2794,4 +2822,4 @@ def _get_config_loader(self):
             self._config_loader = ConfigurationLoader()
             logger.debug("Lazy initialized ConfigurationLoader for vector indexer")
 
-        return self._config_loader
+        return self._config_loader
\ No newline at end of file
diff --git a/src/tool_classifier/base_workflow.py b/src/tool_classifier/base_workflow.py
index 45886fa..8f07076 100644
--- a/src/tool_classifier/base_workflow.py
+++ b/src/tool_classifier/base_workflow.py
@@ -33,7 +33,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute workflow in non-streaming mode.
@@ -44,7 +44,7 @@ async def execute_async(
         Args:
             request: The orchestration request containing user query and context
             context: Workflow-specific metadata from ClassificationResult.metadata
-            timing_dict: Optional dictionary for tracking step execution times
+            time_metric: Optional dictionary for tracking step execution times
 
         Returns:
             OrchestrationResponse if workflow can handle this query
@@ -70,7 +70,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute workflow in streaming mode (Server-Sent Events).
@@ -81,7 +81,7 @@ async def execute_streaming(
         Args:
             request: The orchestration request containing user query and context
             context: Workflow-specific metadata from ClassificationResult.metadata
-            timing_dict: Optional dictionary for tracking step execution times
+            time_metric: Optional dictionary for tracking step execution times
 
         Returns:
             AsyncIterator[str] yielding SSE-formatted strings if workflow can handle
@@ -119,4 +119,4 @@ async def stream_response():
 
             return stream_response()
         """
-        pass
+        pass
\ No newline at end of file
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index ab9e402..f9d83e5 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -106,7 +106,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: Literal[False] = False,
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> OrchestrationResponse: ...
 
     @overload
@@ -115,7 +115,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: Literal[True],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> AsyncIterator[str]: ...
 
     async def route_to_workflow(
@@ -123,7 +123,7 @@ async def route_to_workflow(
         classification: ClassificationResult,
         request: OrchestrationRequest,
         is_streaming: bool = False,
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Union[OrchestrationResponse, AsyncIterator[str]]:
         """
         Route request to appropriate workflow based on classification.
@@ -135,7 +135,7 @@ async def route_to_workflow(
             classification: Classification result from classify()
             request: Original orchestration request
             is_streaming: Whether to use streaming mode (for /orchestrate/stream)
-            timing_dict: Optional timing dictionary for workflow step tracking
+            time_metric: Optional timing dictionary for workflow step tracking
 
         Returns:
             OrchestrationResponse for non-streaming mode
@@ -166,7 +166,7 @@ async def route_to_workflow(
                 request=request,
                 context=classification.metadata,
                 start_layer=classification.workflow,
-                timing_dict=timing_dict,
+                time_metric=time_metric,
             )
         else:
             # NON-STREAMING MODE: For /orchestrate and /orchestrate/test endpoints
@@ -175,7 +175,7 @@ async def route_to_workflow(
                 request=request,
                 context=classification.metadata,
                 start_layer=classification.workflow,
-                timing_dict=timing_dict,
+                time_metric=time_metric,
             )
 
     def _get_workflow_executor(self, workflow_type: WorkflowType) -> Any:
@@ -194,7 +194,7 @@ async def _execute_with_fallback_async(
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> OrchestrationResponse:
         """
         Execute workflow with fallback to subsequent layers (non-streaming).
@@ -210,7 +210,7 @@ async def _execute_with_fallback_async(
             request: Orchestration request
             context: Workflow context/metadata
             start_layer: Starting workflow type
-            timing_dict: Optional timing dictionary for tracking
+            time_metric: Optional timing dictionary for tracking
         """
         chat_id = request.chatId
         workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value)
@@ -218,7 +218,7 @@ async def _execute_with_fallback_async(
         logger.info(f"[{chat_id}] Executing {workflow_name} (non-streaming)")
 
         try:
-            result = await workflow.execute_async(request, context, timing_dict)
+            result = await workflow.execute_async(request, context, time_metric)
 
             if result is not None:
                 logger.info(f"[{chat_id}] {workflow_name} handled successfully")
@@ -246,7 +246,7 @@ async def _execute_with_fallback_async(
                     f"(Layer {WORKFLOW_LAYER_ORDER.index(next_layer) + 1})"
                 )
 
-                result = await next_workflow.execute_async(request, {}, timing_dict)
+                result = await next_workflow.execute_async(request, {}, time_metric)
 
                 if result is not None:
                     logger.info(f"[{chat_id}] {next_name} handled successfully")
@@ -262,7 +262,7 @@ async def _execute_with_fallback_async(
             logger.error(f"[{chat_id}] Error executing {workflow_name}: {e}")
             # Fallback to RAG on error
             logger.info(f"[{chat_id}] Falling back to RAG due to error")
-            rag_result = await self.rag_workflow.execute_async(request, {}, timing_dict)
+            rag_result = await self.rag_workflow.execute_async(request, {}, time_metric)
             if rag_result is not None:
                 return rag_result
             else:
@@ -274,7 +274,7 @@ async def _execute_with_fallback_streaming(
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> AsyncIterator[str]:
         """
         Execute workflow with fallback to subsequent layers (streaming).
@@ -290,7 +290,7 @@ async def _execute_with_fallback_streaming(
             request: Orchestration request
             context: Workflow context/metadata
             start_layer: Starting workflow type
-            timing_dict: Optional timing dictionary for tracking
+            time_metric: Optional timing dictionary for tracking
         """
         chat_id = request.chatId
         workflow_name = WORKFLOW_DISPLAY_NAMES.get(start_layer, start_layer.value)
@@ -298,7 +298,7 @@ async def _execute_with_fallback_streaming(
         logger.info(f"[{chat_id}] Executing {workflow_name} (streaming)")
 
         try:
-            result = await workflow.execute_streaming(request, context, timing_dict)
+            result = await workflow.execute_streaming(request, context, time_metric)
 
             if result is not None:
                 logger.info(f"[{chat_id}] {workflow_name} streaming started")
@@ -329,7 +329,7 @@ async def _execute_with_fallback_streaming(
                     f"(Layer {layer_number})"
                 )
 
-                result = await next_workflow.execute_streaming(request, {}, timing_dict)
+                result = await next_workflow.execute_streaming(request, {}, time_metric)
 
                 if result is not None:
                     logger.info(f"[{chat_id}] {next_name} streaming started")
@@ -348,10 +348,10 @@ async def _execute_with_fallback_streaming(
             # Fallback to RAG on error
             logger.info(f"[{chat_id}] Falling back to RAG streaming due to error")
             streaming_result = await self.rag_workflow.execute_streaming(
-                request, {}, timing_dict
+                request, {}, time_metric
             )
             if streaming_result is not None:
                 async for chunk in streaming_result:
                     yield chunk
             else:
-                raise RuntimeError("RAG workflow returned None unexpectedly")
+                raise RuntimeError("RAG workflow returned None unexpectedly")
\ No newline at end of file
diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py
index 4039b23..2dc1998 100644
--- a/src/tool_classifier/workflows/context_workflow.py
+++ b/src/tool_classifier/workflows/context_workflow.py
@@ -35,7 +35,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute context workflow in non-streaming mode.
@@ -46,7 +46,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query and history
             context: Metadata with is_greeting, can_answer_from_history flags
-            timing_dict: Optional timing dictionary for future timing tracking
+            time_metric: Optional timing dictionary for future timing tracking
 
         Returns:
             OrchestrationResponse with context-based answer or None to fallback
@@ -64,7 +64,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute context workflow in streaming mode.
@@ -75,7 +75,7 @@ async def execute_streaming(
         Args:
             request: Orchestration request with user query and history
             context: Metadata with is_greeting, can_answer_from_history flags
-            timing_dict: Optional timing dictionary for future timing tracking
+            time_metric: Optional timing dictionary for future timing tracking
 
         Returns:
             AsyncIterator yielding SSE strings or None to fallback
@@ -87,4 +87,4 @@ async def execute_streaming(
 
         # TODO: Implement context streaming logic here
         # For now, return None to trigger fallback to next layer (RAG)
-        return None
+        return None
\ No newline at end of file
diff --git a/src/tool_classifier/workflows/ood_workflow.py b/src/tool_classifier/workflows/ood_workflow.py
index c3f9215..1e58552 100644
--- a/src/tool_classifier/workflows/ood_workflow.py
+++ b/src/tool_classifier/workflows/ood_workflow.py
@@ -39,7 +39,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute OOD workflow in non-streaming mode.
@@ -69,7 +69,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query
             context: Unused (OOD doesn't need metadata)
-            timing_dict: Optional timing dictionary for future timing tracking
+            time_metric: Optional timing dictionary for future timing tracking
 
         Returns:
             OrchestrationResponse with OOD message
@@ -88,7 +88,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute OOD workflow in streaming mode.
@@ -131,4 +131,4 @@ async def stream_ood_message():
 
         # TODO: Implement OOD streaming logic here
         # For now, return None (will be implemented as simple message streaming)
-        return None
+        return None
\ No newline at end of file
diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py
index 5c8cd05..426359f 100644
--- a/src/tool_classifier/workflows/rag_workflow.py
+++ b/src/tool_classifier/workflows/rag_workflow.py
@@ -50,7 +50,7 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
         Execute RAG workflow in non-streaming mode.
@@ -65,7 +65,7 @@ async def execute_async(
         Args:
             request: Orchestration request with user query
             context: Unused (RAG doesn't need classification metadata)
-            timing_dict: Optional timing dictionary from parent (for unified tracking)
+            time_metric: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
             OrchestrationResponse with RAG-generated answer
@@ -74,10 +74,10 @@ async def execute_async(
         logger.info(f"[{request.chatId}] Executing RAG workflow (non-streaming)")
 
         # Initialize components needed for RAG pipeline
-        costs_dict: Dict[str, Any] = {}
-        # Use parent timing_dict or create new one
-        if timing_dict is None:
-            timing_dict = {}
+        costs_metric: Dict[str, Any] = {}
+        # Use parent time_metric or create new one
+        if time_metric is None:
+            time_metric = {}
 
         # Initialize service components
         components = self.orchestration_service._initialize_service_components(request)
@@ -86,13 +86,13 @@ async def execute_async(
         response = await self.orchestration_service._execute_orchestration_pipeline(
             request=request,
             components=components,
-            costs_dict=costs_dict,
-            timing_dict=timing_dict,
+            costs_metric=costs_metric,
+            time_metric=time_metric,
             prefix="rag",
         )
 
         # Log costs (timing is logged by parent orchestration service)
-        self.orchestration_service.log_costs(costs_dict)
+        self.orchestration_service.log_costs(costs_metric)
 
         return response
 
@@ -100,7 +100,7 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
         Execute RAG workflow in streaming mode.
@@ -119,7 +119,7 @@ async def execute_streaming(
         Args:
             request: Orchestration request with user query
             context: Unused (RAG doesn't need classification metadata)
-            timing_dict: Optional timing dictionary from parent (for unified tracking)
+            time_metric: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
             AsyncIterator yielding SSE-formatted strings
@@ -128,10 +128,10 @@ async def execute_streaming(
         logger.info(f"[{request.chatId}] Executing RAG workflow (streaming)")
 
         # Initialize tracking dictionaries
-        costs_dict: Dict[str, Any] = {}
-        # Use parent timing_dict or create new one
-        if timing_dict is None:
-            timing_dict = {}
+        costs_metric: Dict[str, Any] = {}
+        # Use parent time_metric or create new one
+        if time_metric is None:
+            time_metric = {}
 
         # Get components from context if provided, otherwise initialize
         components = context.get("components")
@@ -172,7 +172,7 @@ def mark_error(self, error_id: str) -> None:
             request=request,
             components=components,
             stream_ctx=stream_ctx,
-            costs_dict=costs_dict,
-            timing_dict=timing_dict,
+            costs_metric=costs_metric,
+            time_metric=time_metric,
         ):
-            yield sse_chunk
+            yield sse_chunk
\ No newline at end of file
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index bed97dd..6460544 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -27,6 +27,7 @@
     SERVICE_DISCOVERY_TIMEOUT,
 )
 from tool_classifier.intent_detector import IntentDetectionModule
+import time
 
 
 class LLMServiceProtocol(Protocol):
@@ -64,11 +65,11 @@ def format_sse(self, chat_id: str, content: str) -> str:
         """
         ...
 
-    def log_costs(self, costs_dict: Dict[str, Dict[str, Any]]) -> None:
+    def log_costs(self, costs_metric: Dict[str, Dict[str, Any]]) -> None:
         """Log cost information for tracking.
 
         Args:
-            costs_dict: Dictionary of costs per component
+            costs_metric: Dictionary of costs per component
         """
         ...
 
@@ -296,7 +297,7 @@ async def _process_intent_detection(
         request: OrchestrationRequest,
         chat_id: str,
         context: Dict[str, Any],
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> None:
         """Detect intent, validate service, and populate context.
 
@@ -311,7 +312,7 @@ async def _process_intent_detection(
             request: Orchestration request
             chat_id: Chat ID for logging
             context: Context dict to populate with results
-            costs_dict: Dictionary to track LLM costs
+            costs_metric: Dictionary to track LLM costs
         """
         intent_result, intent_usage = await self._detect_service_intent(
             user_query=request.message,
@@ -319,7 +320,7 @@ async def _process_intent_detection(
             conversation_history=request.conversationHistory,
             chat_id=chat_id,
         )
-        costs_dict["intent_detection"] = intent_usage
+        costs_metric["intent_detection"] = intent_usage
 
         if intent_result and intent_result.get("matched_service_id"):
             service_id = intent_result["matched_service_id"]
@@ -463,7 +464,7 @@ async def _log_request_details(
         request: OrchestrationRequest,
         context: Dict[str, Any],
         mode: str,
-        costs_dict: Dict[str, Dict[str, Any]],
+        costs_metric: Dict[str, Dict[str, Any]],
     ) -> None:
         """Log request details and perform service discovery.
 
@@ -471,7 +472,7 @@ async def _log_request_details(
             request: The orchestration request
             context: Workflow context dictionary
             mode: Execution mode ("streaming" or "non-streaming")
-            costs_dict: Dictionary to accumulate cost tracking information
+            costs_metric: Dictionary to accumulate cost tracking information
         """
         chat_id = request.chatId
         logger.info(f"[{chat_id}] SERVICE WORKFLOW ({mode}): {request.message}")
@@ -529,7 +530,7 @@ async def _log_request_details(
                         request=request,
                         chat_id=chat_id,
                         context=context,
-                        costs_dict=costs_dict,
+                        costs_metric=costs_metric,
                     )
             else:
                 services = response_data.get("services", [])
@@ -540,7 +541,7 @@ async def _log_request_details(
                         request=request,
                         chat_id=chat_id,
                         context=context,
-                        costs_dict=costs_dict,
+                        costs_metric=costs_metric,
                     )
         else:
             logger.warning(f"[{chat_id}] Service discovery failed")
@@ -549,31 +550,30 @@ async def execute_async(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """Execute service workflow in non-streaming mode.
 
         Args:
             request: Orchestration request
             context: Workflow context
-            timing_dict: Optional timing dictionary for unified tracking
+            time_metric: Optional timing dictionary for unified tracking
         """
-        import time
 
         chat_id = request.chatId
 
         # Create costs tracking dictionary (follows RAG workflow pattern)
-        costs_dict: Dict[str, Dict[str, Any]] = {}
-        # Use parent timing_dict or create new one
-        if timing_dict is None:
-            timing_dict = {}
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        # Use parent time_metric or create new one
+        if time_metric is None:
+            time_metric = {}
 
         # Service discovery with timing
         start_time = time.time()
         await self._log_request_details(
-            request, context, mode="non-streaming", costs_dict=costs_dict
+            request, context, mode="non-streaming", costs_metric=costs_metric
         )
-        timing_dict["service.discovery"] = time.time() - start_time
+        time_metric["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):
@@ -611,7 +611,7 @@ async def execute_async(
             service_name=service_metadata["service_name"],
             chat_id=chat_id,
         )
-        timing_dict["service.entity_validation"] = time.time() - start_time
+        time_metric["service.entity_validation"] = time.time() - start_time
 
         logger.info(
             f"[{chat_id}]   - Validation status: "
@@ -673,7 +673,7 @@ async def execute_async(
 
         # Log costs after service workflow completes (follows RAG workflow pattern)
         if self.orchestration_service:
-            self.orchestration_service.log_costs(costs_dict)
+            self.orchestration_service.log_costs(costs_metric)
 
         return OrchestrationResponse(
             chatId=request.chatId,
@@ -688,31 +688,30 @@ async def execute_streaming(
         self,
         request: OrchestrationRequest,
         context: Dict[str, Any],
-        timing_dict: Optional[Dict[str, float]] = None,
+        time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """Execute service workflow in streaming mode.
 
         Args:
             request: Orchestration request
             context: Workflow context
-            timing_dict: Optional timing dictionary for unified tracking
+            time_metric: Optional timing dictionary for unified tracking
         """
-        import time
 
         chat_id = request.chatId
 
         # Create costs tracking dictionary (follows RAG workflow pattern)
-        costs_dict: Dict[str, Dict[str, Any]] = {}
-        # Use parent timing_dict or create new one
-        if timing_dict is None:
-            timing_dict = {}
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        # Use parent time_metric or create new one
+        if time_metric is None:
+            time_metric = {}
 
         # Service discovery with timing
         start_time = time.time()
         await self._log_request_details(
-            request, context, mode="streaming", costs_dict=costs_dict
+            request, context, mode="streaming", costs_metric=costs_metric
         )
-        timing_dict["service.discovery"] = time.time() - start_time
+        time_metric["service.discovery"] = time.time() - start_time
 
         # Check if service was detected and validated
         if not context.get("service_id"):
@@ -820,7 +819,7 @@ async def debug_stream() -> AsyncIterator[str]:
 
             # Log costs after streaming completes (follows RAG workflow pattern)
             # Must be inside generator because costs are accumulated during streaming
-            orchestration_service.log_costs(costs_dict)
+            orchestration_service.log_costs(costs_metric)
 
         return debug_stream()
-        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)
+        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)
\ No newline at end of file
diff --git a/src/utils/time_tracker.py b/src/utils/time_tracker.py
index 606e530..c9c1141 100644
--- a/src/utils/time_tracker.py
+++ b/src/utils/time_tracker.py
@@ -5,16 +5,16 @@
 
 
 def log_step_timings(
-    timing_dict: Dict[str, float], chat_id: Optional[str] = None
+    time_metric: Dict[str, float], chat_id: Optional[str] = None
 ) -> None:
     """
     Log all step timings in a clean format.
 
     Args:
-        timing_dict: Dictionary containing step names and their execution times
+        time_metric: Dictionary containing step names and their execution times
         chat_id: Optional chat ID for context
     """
-    if not timing_dict:
+    if not time_metric:
         return
 
     # Parent/composite timings that should be hidden from logs
@@ -25,7 +25,7 @@ def log_step_timings(
     logger.info(f"{prefix}STEP EXECUTION TIMES:")
 
     total_time = 0.0
-    for step_name, elapsed_time in timing_dict.items():
+    for step_name, elapsed_time in time_metric.items():
         # Skip parent/composite timings entirely
         if step_name in PARENT_TIMINGS:
             continue
@@ -37,4 +37,4 @@ def log_step_timings(
             logger.info(f"  {step_name:25s}: {elapsed_time:.3f}s")
             total_time += elapsed_time
 
-    logger.info(f"  {'TOTAL':25s}: {total_time:.3f}s")
+    logger.info(f"  {'TOTAL':25s}: {total_time:.3f}s")
\ No newline at end of file

From bee9fbfba8e0ce9bfaf62788159dfe8deb451b55 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Mon, 2 Mar 2026 21:57:15 +0530
Subject: [PATCH 17/27] fixed issue

---
 src/tool_classifier/sparse_encoder.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/tool_classifier/sparse_encoder.py b/src/tool_classifier/sparse_encoder.py
index 0d0dc3f..06f38a8 100644
--- a/src/tool_classifier/sparse_encoder.py
+++ b/src/tool_classifier/sparse_encoder.py
@@ -8,6 +8,7 @@
 Uses hash-based indexing compatible with Qdrant's sparse vector format.
 """
 
+import hashlib
 import re
 from collections import Counter
 from dataclasses import dataclass, field
@@ -68,10 +69,12 @@ def compute_sparse_vector(text: str) -> SparseVector:
     token_counts = Counter(tokens)
 
     # Hash-based indexing: map each token to an index in [0, SPARSE_VOCAB_SIZE)
-    # Collisions are handled by summing values at the same index
+    # Uses MD5 (first 4 bytes) for deterministic cross-process indices.
+    # Collisions are handled by summing values at the same index.
     hash_counts: dict[int, float] = {}
     for token, count in token_counts.items():
-        idx = hash(token) % SPARSE_VOCAB_SIZE
+        digest = hashlib.md5(token.encode(), usedforsecurity=False).digest()  # noqa: S324
+        idx = int.from_bytes(digest[:4], "little") % SPARSE_VOCAB_SIZE
         # Handle hash collisions by accumulating
         hash_counts[idx] = hash_counts.get(idx, 0) + float(count)
 

From 0a0806ff72e9bb8e36ea3576917f9f397aff067d Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 3 Mar 2026 09:26:39 +0530
Subject: [PATCH 18/27] optimize first user query response generation time

---
 src/contextual_retrieval/bm25_search.py       | 45 ++++++++++--
 .../contextual_retriever.py                   | 30 ++++++--
 src/guardrails/nemo_rails_adapter.py          | 41 ++++-------
 src/llm_orchestration_service.py              | 56 ++++++++++++++-
 src/llm_orchestration_service_api.py          | 68 ++++---------------
 .../vault/vault_client.py                     | 25 +------
 src/utils/prompt_config_loader.py             | 10 +--
 7 files changed, 156 insertions(+), 119 deletions(-)

diff --git a/src/contextual_retrieval/bm25_search.py b/src/contextual_retrieval/bm25_search.py
index 5bde02d..d9545d0 100644
--- a/src/contextual_retrieval/bm25_search.py
+++ b/src/contextual_retrieval/bm25_search.py
@@ -5,10 +5,11 @@
 when collection data changes.
 """
 
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, Set
 from loguru import logger
 from rank_bm25 import BM25Okapi
 import re
+import asyncio
 from contextual_retrieval.contextual_retrieval_api_client import get_http_client_manager
 from contextual_retrieval.error_handler import SecureErrorHandler
 from contextual_retrieval.constants import (
@@ -33,6 +34,11 @@ def __init__(
         self.chunk_mapping: Dict[int, Dict[str, Any]] = {}
         self.last_collection_stats: Dict[str, Any] = {}
         self.tokenizer_pattern = re.compile(r"\w+")  # Simple word tokenizer
+        # Background refresh state - prevents blocking queries during index rebuild
+        self._refresh_in_progress: bool = False
+        self._refresh_lock: asyncio.Lock = asyncio.Lock()
+        # Strong references to background tasks to prevent premature GC
+        self._background_tasks: Set[asyncio.Task[None]] = set()
 
     async def _get_http_client_manager(self):
         """Get the HTTP client manager instance."""
@@ -103,10 +109,16 @@ async def search_bm25(
             limit = self._config.search.topk_bm25
 
         try:
-            # Check if index needs refresh
+            # Check if index needs refresh (non-blocking: schedule background rebuild,
+            # current query continues with the existing index to avoid latency).
             if await self._should_refresh_index():
-                logger.info("Collection data changed - refreshing BM25 index")
-                await self.initialize_index()
+                logger.info(
+                    "Collection data changed - scheduling background BM25 refresh "
+                    "(current query uses existing index)"
+                )
+                task = asyncio.create_task(self._background_refresh_index())
+                self._background_tasks.add(task)
+                task.add_done_callback(self._background_tasks.discard)
 
             if not self.bm25_index:
                 logger.error("BM25 index not initialized")
@@ -162,6 +174,31 @@ async def search_bm25(
             logger.error(f"BM25 search failed: {e}")
             return []
 
+    async def _background_refresh_index(self) -> None:
+        """
+        Rebuild the BM25 index in the background without blocking in-flight queries.
+
+        Uses a lock to ensure only one rebuild runs at a time.  If a rebuild is
+        already in progress when a second collection-change is detected, the
+        duplicate request is silently discarded — the in-progress rebuild will
+        capture the latest data anyway.
+        """
+        if self._refresh_in_progress:
+            logger.debug("BM25 background refresh already running - skipping duplicate")
+            return
+        async with self._refresh_lock:
+            if self._refresh_in_progress:
+                return
+            self._refresh_in_progress = True
+        try:
+            logger.info("Starting background BM25 index refresh...")
+            await self.initialize_index()
+            logger.info("Background BM25 index refresh complete")
+        except Exception as e:
+            logger.error(f"Background BM25 refresh failed: {e}")
+        finally:
+            self._refresh_in_progress = False
+
     async def _fetch_all_contextual_chunks(self) -> List[Dict[str, Any]]:
         """Fetch all chunks from contextual collections."""
         all_chunks: List[Dict[str, Any]] = []
diff --git a/src/contextual_retrieval/contextual_retriever.py b/src/contextual_retrieval/contextual_retriever.py
index b6d4699..048c131 100644
--- a/src/contextual_retrieval/contextual_retriever.py
+++ b/src/contextual_retrieval/contextual_retriever.py
@@ -42,6 +42,7 @@ def __init__(
         connection_id: Optional[str] = None,
         config_path: Optional[str] = None,
         llm_service: Optional["LLMOrchestrationService"] = None,
+        shared_bm25: Optional[SmartBM25Search] = None,
     ):
         """
         Initialize contextual retriever.
@@ -52,6 +53,10 @@ def __init__(
             connection_id: Optional connection ID
             config_path: Optional config file path
             llm_service: Optional LLM service instance (prevents circular dependency)
+            shared_bm25: Optional pre-warmed SmartBM25Search singleton.  When
+                provided the retriever skips the expensive index-build step during
+                initialize() and reuses the already-ready index, eliminating the
+                cold-start latency on the first query.
         """
         self.qdrant_url = qdrant_url
         self.environment = environment
@@ -70,7 +75,14 @@ def __init__(
         # Initialize components with configuration
         self.provider_detection = DynamicProviderDetection(qdrant_url, self.config)
         self.qdrant_search = QdrantContextualSearch(qdrant_url, self.config)
-        self.bm25_search = SmartBM25Search(qdrant_url, self.config)
+        # Use the injected pre-warmed singleton when available; create a fresh
+        # instance only as a fallback (avoids duplicate Qdrant scroll on startup).
+        self.bm25_search: SmartBM25Search = (
+            shared_bm25
+            if shared_bm25 is not None
+            else SmartBM25Search(qdrant_url, self.config)
+        )
+        self._bm25_is_shared: bool = shared_bm25 is not None
         self.rank_fusion = DynamicRankFusion(self.config)
 
         # State
@@ -87,10 +99,18 @@ async def initialize(self) -> bool:
         try:
             logger.info("Initializing Contextual Retriever...")
 
-            # Initialize BM25 index
-            bm25_success = await self.bm25_search.initialize_index()
-            if not bm25_success:
-                logger.warning("BM25 initialization failed - will skip BM25 search")
+            # If received a pre-warmed shared BM25 index, reuse it directly.
+            # This is the normal startup path and adds zero latency to the first query.
+            if self._bm25_is_shared and self.bm25_search.bm25_index is not None:
+                logger.info(
+                    "Using pre-warmed shared BM25 index - skipping BM25 build "
+                    f"({len(self.bm25_search.chunk_mapping)} chunks ready)"
+                )
+            else:
+                # No shared index available - build it now (fallback path).
+                bm25_success = await self.bm25_search.initialize_index()
+                if not bm25_success:
+                    logger.warning("BM25 initialization failed - will skip BM25 search")
 
             self.initialized = True
             logger.info("Contextual Retriever initialized successfully")
diff --git a/src/guardrails/nemo_rails_adapter.py b/src/guardrails/nemo_rails_adapter.py
index 1ae3898..17f6585 100644
--- a/src/guardrails/nemo_rails_adapter.py
+++ b/src/guardrails/nemo_rails_adapter.py
@@ -57,14 +57,14 @@ def __init__(
         self._rails: Optional[LLMRails] = None
         self._initialized = False
 
-        logger.info(f"Initializing NeMoRailsAdapter for environment: {environment}")
+        logger.debug(f"NeMoRailsAdapter created for environment: {environment}")
 
     def _register_custom_provider(self) -> None:
         """Register DSPy custom LLM provider with NeMo Guardrails."""
         try:
             from src.guardrails.dspy_nemo_adapter import DSPyLLMProviderFactory
 
-            logger.info("Registering DSPy custom LLM provider with NeMo Guardrails")
+            logger.debug("Registering DSPy custom LLM provider with NeMo Guardrails")
 
             # NeMo Guardrails' register_llm_provider accepts callable factories at runtime.
             # We instantiate DSPyLLMProviderFactory first, then register the instance.
@@ -74,7 +74,7 @@ def _register_custom_provider(self) -> None:
             # We use cast to satisfy the type checker while maintaining runtime correctness.
             factory = DSPyLLMProviderFactory()
             register_llm_provider("dspy-custom", cast(Type[BaseLLM], factory))
-            logger.info("DSPy custom LLM provider registered successfully")
+            logger.debug("DSPy custom LLM provider registered successfully")
 
         except Exception as e:
             logger.error(f"Failed to register DSPy custom provider: {str(e)}")
@@ -86,8 +86,8 @@ def _ensure_initialized(self) -> None:
             return
 
         try:
-            logger.info(
-                "Initializing NeMo Guardrails with DSPy LLM and streaming support"
+            logger.debug(
+                f"Initializing NeMo Guardrails with DSPy LLM (env={self.environment})"
             )
 
             from llm_orchestrator_config.llm_manager import LLMManager
@@ -106,33 +106,24 @@ def _ensure_initialized(self) -> None:
             guardrails_loader = get_guardrails_loader()
             config_path, metadata = guardrails_loader.get_optimized_config_path()
 
-            logger.info(f"Loading guardrails config from: {config_path}")
+            logger.debug(f"Loading guardrails config from: {config_path}")
 
             rails_config = RailsConfig.from_path(str(config_path.parent))
 
             rails_config.streaming = True
 
-            logger.info("Streaming configuration:")
-            logger.info(f"  Global streaming: {rails_config.streaming}")
-
-            if hasattr(rails_config, "rails") and hasattr(rails_config.rails, "output"):
+            if metadata.get("optimized", False):
+                version = metadata.get("version", "unknown")
+                metrics = metadata.get("metrics", {})
+                accuracy = metrics.get("weighted_accuracy", "N/A") if metrics else "N/A"
                 logger.info(
-                    f"  Output rails config exists: {rails_config.rails.output}"
+                    f"Guardrails ready: OPTIMIZED config v={version}, "
+                    f"weighted_accuracy={accuracy}, env={self.environment}"
                 )
             else:
-                logger.info("  Output rails config will be loaded from YAML")
-
-            if metadata.get("optimized", False):
                 logger.info(
-                    f"Loaded OPTIMIZED guardrails config (version: {metadata.get('version', 'unknown')})"
+                    f"Guardrails ready: BASE config (no optimization), env={self.environment}"
                 )
-                metrics = metadata.get("metrics", {})
-                if metrics:
-                    logger.info(
-                        f" Optimization metrics: weighted_accuracy={metrics.get('weighted_accuracy', 'N/A')}"
-                    )
-            else:
-                logger.info("Loaded BASE guardrails config (no optimization)")
 
             from src.guardrails.dspy_nemo_adapter import DSPyNeMoLLM
 
@@ -144,18 +135,16 @@ def _ensure_initialized(self) -> None:
                 verbose=False,
             )
 
-            if (
+            if not (
                 hasattr(self._rails.config, "streaming")
                 and self._rails.config.streaming
             ):
-                logger.info("✓ Streaming enabled in NeMo Guardrails configuration")
-            else:
                 logger.warning(
                     "Streaming not enabled in configuration - this may cause issues"
                 )
 
             self._initialized = True
-            logger.info("NeMo Guardrails initialized successfully with DSPy LLM")
+            logger.debug("NeMo Guardrails initialized successfully with DSPy LLM")
 
         except Exception as e:
             logger.error(f"Failed to initialize NeMo Guardrails: {str(e)}")
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 0d32941..88c85dd 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -55,6 +55,7 @@
 from src.utils.query_validator import validate_query_basic
 from src.guardrails import NeMoRailsAdapter, GuardrailCheckResult
 from src.contextual_retrieval import ContextualRetriever
+from src.contextual_retrieval.bm25_search import SmartBM25Search
 from src.llm_orchestrator_config.exceptions import (
     ContextualRetrieverInitializationError,
     ContextualRetrievalFailureError,
@@ -133,6 +134,13 @@ def __init__(self) -> None:
         # This allows components to be initialized per-request with proper context
         self.tool_classifier = None
 
+        # Shared BM25 search index pre-warmed at startup.
+        # Populated by _prewarm_shared_bm25() which is called from the FastAPI
+        # lifespan so it runs inside the async event loop.  Until then it is None
+        # and each ContextualRetriever will build the index on first query (graceful
+        # degradation path).
+        self.shared_bm25_search: Optional[SmartBM25Search] = None
+
         # Initialize shared guardrails adapters at startup (production and testing)
         self.shared_guardrails_adapters = (
             self._initialize_shared_guardrails_at_startup()
@@ -168,10 +176,17 @@ def _initialize_shared_guardrails_at_startup(self) -> Dict[str, NeMoRailsAdapter
                     connection_id=None,  # Shared configuration, not user-specific
                 )
 
+                # Eagerly trigger the full internal initialization (NeMo config
+                # loading, LLMRails creation, embedding model download) so that
+                # the first user query is not penalised by the cold-start cost.
+                # Without this, _ensure_initialized() runs lazily on the first
+                guardrails_adapter._ensure_initialized()
+
                 elapsed_time = time.time() - start_time
                 adapters[env] = guardrails_adapter
                 logger.info(
-                    f" Guardrails for '{env}' initialized successfully in {elapsed_time:.3f}s"
+                    f" Guardrails for '{env}' fully initialized in {elapsed_time:.3f}s "
+                    f"(NeMo Rails + embedding model loaded)"
                 )
 
             except Exception as e:
@@ -197,6 +212,43 @@ def _initialize_shared_guardrails_at_startup(self) -> Dict[str, NeMoRailsAdapter
 
         return adapters
 
+    async def _prewarm_shared_bm25(self) -> None:
+        """
+        Pre-warm the shared BM25 index at application startup.
+
+        Must be called from an async context (e.g. FastAPI lifespan) so that
+        asyncio is available for the HTTP calls to Qdrant.  Absorbs the
+        cold-start latency (fetching all chunks + building BM25Okapi corpus)
+        at deploy time so that the first real user query is not penalised.
+
+        On any failure the method logs a warning and leaves
+        self.shared_bm25_search as None — the ContextualRetriever will then
+        fall back to building the index on the first query (graceful degradation).
+        """
+        qdrant_url = os.getenv("QDRANT_URL", "http://qdrant:6333")
+        logger.info("Pre-warming shared BM25 index at startup...")
+        prewarm_start = time.time()
+        try:
+            bm25 = SmartBM25Search(qdrant_url=qdrant_url)
+            success = await bm25.initialize_index()
+            if success:
+                self.shared_bm25_search = bm25
+                elapsed = time.time() - prewarm_start
+                logger.info(
+                    f"Shared BM25 index pre-warmed in {elapsed:.2f}s "
+                    f"({len(bm25.chunk_mapping)} chunks indexed)"
+                )
+            else:
+                logger.warning(
+                    "BM25 pre-warming produced an empty index - "
+                    "index will be built on first query instead"
+                )
+        except Exception as e:
+            logger.warning(
+                f"BM25 pre-warming failed: {e} - "
+                f"index will be built on first query (graceful degradation)"
+            )
+
     @observe(name="orchestration_request", as_type="agent")
     async def process_orchestration_request(
         self, request: OrchestrationRequest
@@ -1786,7 +1838,6 @@ def _initialize_guardrails(
                 environment=environment, connection_id=connection_id
             )
 
-            logger.info("Guardrails adapter initialized successfully")
             return guardrails_adapter
 
         except Exception as e:
@@ -2322,6 +2373,7 @@ def _initialize_contextual_retriever(
                 environment=environment,
                 connection_id=connection_id,
                 llm_service=self,  # Inject self to eliminate circular dependency
+                shared_bm25=self.shared_bm25_search,  # Inject pre-warmed BM25 index
             )
 
             logger.info("Contextual retriever initialized successfully")
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
index 2a929db..12c5dc3 100644
--- a/src/llm_orchestration_service_api.py
+++ b/src/llm_orchestration_service_api.py
@@ -1,5 +1,6 @@
 """LLM Orchestration Service API - FastAPI application."""
 
+import logging
 from contextlib import asynccontextmanager
 from typing import Any, AsyncGenerator, Dict
 
@@ -49,10 +50,23 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     """Application lifespan manager."""
     # Startup
     logger.info("Starting LLM Orchestration Service API")
+
+    # nemoguardrails.actions.action_dispatcher logs every action it registers
+    logging.getLogger("nemoguardrails.actions.action_dispatcher").setLevel(
+        logging.WARNING
+    )
+    logging.getLogger("langfuse").setLevel(logging.ERROR)
+
     try:
         app.state.orchestration_service = LLMOrchestrationService()
         logger.info("LLM Orchestration Service initialized successfully")
 
+        # Pre-warm shared BM25 index so the first query is never penalised by
+        # the cold-start cost of scrolling all Qdrant chunks + building the index.
+        logger.info("Pre-warming shared BM25 index...")
+        await app.state.orchestration_service._prewarm_shared_bm25()
+        logger.info("BM25 pre-warming complete")
+
         # Initialize rate limiter if enabled
         if StreamConfig.RATE_LIMIT_ENABLED:
             app.state.rate_limiter = RateLimiter(
@@ -841,60 +855,6 @@ def refresh_prompt_config(http_request: Request) -> Dict[str, Any]:
             },
         ) from e
 
-    try:
-        success = orchestration_service.prompt_config_loader.force_refresh()
-
-        if success:
-            # Get prompt metadata without exposing content (security)
-            custom_instructions = (
-                orchestration_service.prompt_config_loader.get_custom_instructions()
-            )
-            prompt_length = len(custom_instructions)
-
-            # Generate hash for verification purposes (without exposing content)
-            import hashlib
-
-            prompt_hash = hashlib.sha256(custom_instructions.encode()).hexdigest()[:16]
-
-            logger.info(
-                f"Prompt configuration cache refreshed successfully ({prompt_length} chars)"
-            )
-
-            return {
-                "refreshed": True,
-                "message": "Prompt configuration refreshed successfully",
-                "prompt_length": prompt_length,
-                "content_hash": prompt_hash,  # Safe: hash instead of preview
-            }
-        else:
-            # No fresh data loaded - could be fetch failure or truly not found
-            error_id = generate_error_id()
-            logger.warning(
-                f"[{error_id}] Prompt configuration refresh returned empty result"
-            )
-            raise HTTPException(
-                status_code=status.HTTP_404_NOT_FOUND,
-                detail={
-                    "error": "No prompt configuration found in database",
-                    "error_id": error_id,
-                },
-            )
-
-    except HTTPException:
-        # Re-raise HTTP exceptions as-is
-        raise
-    except Exception as e:
-        # Unexpected errors during refresh
-        error_id = generate_error_id()
-        logger.error(f"[{error_id}] Failed to refresh prompt configuration: {e}")
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail={
-                "error": "Failed to refresh prompt configuration",
-                "error_id": error_id,
-            },
-        ) from e
-
 
 if __name__ == "__main__":
     logger.info("Starting LLM Orchestration Service API server on port 8100")
diff --git a/src/llm_orchestrator_config/vault/vault_client.py b/src/llm_orchestrator_config/vault/vault_client.py
index b0c3a3d..241f019 100644
--- a/src/llm_orchestrator_config/vault/vault_client.py
+++ b/src/llm_orchestrator_config/vault/vault_client.py
@@ -142,10 +142,7 @@ def is_authenticated(self) -> bool:
         try:
             # If using proxy mode, skip token checks
             if not self.use_token_file:
-                logger.debug(
-                    "Using vault agent proxy - skipping token authentication check"
-                )
-                # Just verify vault is accessible
+                # Just verify vault is accessible (no token needed with proxy)
                 return self.is_vault_available()
 
             # Check token is available
@@ -182,27 +179,10 @@ def is_vault_available(self) -> bool:
         """
         try:
             response = self.client.sys.read_health_status()
-            logger.debug(f"Vault health response type: {type(response)}")
-            logger.debug(f"Vault health response: {response}")
 
             # For Vault health endpoint, we primarily check the HTTP status code
             if hasattr(response, "status_code"):
-                is_available = response.status_code == 200
-                logger.debug(
-                    f"Vault health check: status_code={response.status_code}, available={is_available}"
-                )
-
-                # Try to get additional details from response body if available
-                try:
-                    if hasattr(response, "json") and callable(response.json):
-                        health_data = response.json()
-                        logger.debug(f"Vault health details: {health_data}")
-                except Exception as e:
-                    logger.debug(
-                        f"Could not parse health response body (this is normal): {e}"
-                    )
-
-                return is_available
+                return response.status_code == 200
             else:
                 # Fallback for non-Response objects (direct dict)
                 if isinstance(response, dict):
@@ -291,7 +271,6 @@ def list_secrets(self, path: str) -> Optional[list[str]]:
                 path=path,
                 mount_point=self.mount_point,
             )
-            logger.debug(f"List secrets response: {response}")
 
             if response and "data" in response:
                 keys = response["data"].get("keys", [])
diff --git a/src/utils/prompt_config_loader.py b/src/utils/prompt_config_loader.py
index 8df8945..01a40c8 100644
--- a/src/utils/prompt_config_loader.py
+++ b/src/utils/prompt_config_loader.py
@@ -229,7 +229,7 @@ def _load_from_ruuter_with_retry(self) -> Optional[str]:
 
                     # Unwrap Ruuter's response wrapper if present
                     if isinstance(data, dict) and "response" in data:
-                        logger.info("Unwrapping 'response' key")
+                        logger.debug("Unwrapping 'response' key")
                         data = data["response"]
 
                     # Now extract prompt from the unwrapped data
@@ -238,25 +238,25 @@ def _load_from_ruuter_with_retry(self) -> Optional[str]:
                         first_elem_keys = (
                             list(data[0].keys()) if isinstance(data[0], dict) else []
                         )
-                        logger.info(
+                        logger.debug(
                             f"Extracting from list, first element keys: {first_elem_keys}"
                         )
                         prompt = data[0].get("prompt", "").strip()
                     elif isinstance(data, dict):
                         # Dict format: {"id": 1, "prompt": "..."}
-                        logger.info(f"Extracting from dict, keys: {list(data.keys())}")
+                        logger.debug(f"Extracting from dict, keys: {list(data.keys())}")
                         prompt = data.get("prompt", "").strip()
                     else:
                         logger.warning(
                             f"Unexpected data type: {type(data).__name__}, structure not recognized"
                         )
 
-                    logger.info(
+                    logger.debug(
                         f"Extracted prompt length: {len(prompt) if prompt else 0}"
                     )
 
                     if prompt:
-                        logger.info(
+                        logger.debug(
                             f"Loaded prompt on attempt {attempt} ({len(prompt)} chars)"
                         )
                         return prompt

From 1eb8b4750b9cdeff2c1107c436f902db5aacc721 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 3 Mar 2026 10:05:34 +0530
Subject: [PATCH 19/27] fixed pr reviewed issues

---
 src/contextual_retrieval/bm25_search.py       | 22 +++++++++++++------
 src/intent_data_enrichment/main_enrichment.py | 22 ++++++++++++-------
 src/llm_orchestration_service.py              | 10 +++++++++
 src/llm_orchestration_service_api.py          |  7 ++++--
 src/tool_classifier/classifier.py             |  8 +++++++
 .../workflows/service_workflow.py             | 12 +++++-----
 6 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/src/contextual_retrieval/bm25_search.py b/src/contextual_retrieval/bm25_search.py
index d9545d0..2be66e4 100644
--- a/src/contextual_retrieval/bm25_search.py
+++ b/src/contextual_retrieval/bm25_search.py
@@ -112,13 +112,21 @@ async def search_bm25(
             # Check if index needs refresh (non-blocking: schedule background rebuild,
             # current query continues with the existing index to avoid latency).
             if await self._should_refresh_index():
-                logger.info(
-                    "Collection data changed - scheduling background BM25 refresh "
-                    "(current query uses existing index)"
-                )
-                task = asyncio.create_task(self._background_refresh_index())
-                self._background_tasks.add(task)
-                task.add_done_callback(self._background_tasks.discard)
+                # Avoid scheduling multiple concurrent refresh tasks; coalesce while a
+                # refresh is already in progress.
+                if not self._refresh_in_progress:
+                    logger.info(
+                        "Collection data changed - scheduling background BM25 refresh "
+                        "(current query uses existing index)"
+                    )
+                    task = asyncio.create_task(self._background_refresh_index())
+                    self._background_tasks.add(task)
+                    task.add_done_callback(self._background_tasks.discard)
+                else:
+                    logger.debug(
+                        "BM25 refresh already in progress; skipping scheduling of a "
+                        "new background refresh task"
+                    )
 
             if not self.bm25_index:
                 logger.error("BM25 index not initialized")
diff --git a/src/intent_data_enrichment/main_enrichment.py b/src/intent_data_enrichment/main_enrichment.py
index 16db8c6..9724683 100644
--- a/src/intent_data_enrichment/main_enrichment.py
+++ b/src/intent_data_enrichment/main_enrichment.py
@@ -219,14 +219,20 @@ async def enrich_service(service_data: ServiceData) -> EnrichmentResult:
             qdrant.ensure_collection()
 
             # Delete old points before inserting new ones
-            qdrant.delete_service_points(service_data.service_id)
-
-            # Step 5: Bulk upsert all points (examples + summary)
-            logger.info(
-                f"Step 5: Storing {len(enriched_points)} points in Qdrant "
-                f"({len(service_data.examples)} examples + 1 summary)"
-            )
-            success = qdrant.upsert_service_points(enriched_points)
+            deleted = qdrant.delete_service_points(service_data.service_id)
+            if not deleted:
+                logger.error(
+                    f"Failed to delete existing points for service_id={service_data.service_id}; "
+                    "aborting upsert to avoid stale data."
+                )
+                success = False
+            else:
+                # Step 5: Bulk upsert all points (examples + summary)
+                logger.info(
+                    f"Step 5: Storing {len(enriched_points)} points in Qdrant "
+                    f"({len(service_data.examples)} examples + 1 summary)"
+                )
+                success = qdrant.upsert_service_points(enriched_points)
         finally:
             qdrant.close()
 
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 88c85dd..7f7432f 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -249,6 +249,16 @@ async def _prewarm_shared_bm25(self) -> None:
                 f"index will be built on first query (graceful degradation)"
             )
 
+    async def aclose(self) -> None:
+        """Release all long-lived async resources held by the service.
+
+        Must be awaited during application shutdown (FastAPI lifespan teardown)
+        to avoid connection leaks from the ToolClassifier's httpx client.
+        """
+        if self.tool_classifier is not None:
+            await self.tool_classifier.aclose()
+            logger.debug("LLMOrchestrationService async resources closed")
+
     @observe(name="orchestration_request", as_type="agent")
     async def process_orchestration_request(
         self, request: OrchestrationRequest
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
index 12c5dc3..0e9b127 100644
--- a/src/llm_orchestration_service_api.py
+++ b/src/llm_orchestration_service_api.py
@@ -85,8 +85,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
 
     # Shutdown
     logger.info("Shutting down LLM Orchestration Service API")
-    # Clean up resources if needed
-    if hasattr(app.state, "orchestration_service"):
+    if (
+        hasattr(app.state, "orchestration_service")
+        and app.state.orchestration_service is not None
+    ):
+        await app.state.orchestration_service.aclose()
         app.state.orchestration_service = None
 
 
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index 0d4644d..f18ef3e 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -99,6 +99,14 @@ def __init__(
             f"(Qdrant: {self._qdrant_base_url})"
         )
 
+    async def aclose(self) -> None:
+        """Close the shared httpx client and release connection pool resources.
+
+        Must be awaited during application shutdown to avoid connection leaks.
+        """
+        await self._qdrant_client.aclose()
+        logger.debug("ToolClassifier Qdrant httpx client closed")
+
     async def classify(
         self,
         query: str,
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index dbb5211..bb72f78 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -581,11 +581,11 @@ async def execute_async(
             # Skip service discovery — use hybrid search match directly
             matched_service_id = context.get("matched_service_id")
             matched_service_name = context.get("matched_service_name")
-            rrf_score = context.get("rrf_score", 0)
+            cosine_score = context.get("cosine_score", 0.0)
 
             logger.info(
                 f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (non-streaming): "
-                f"{matched_service_name} (rrf_score={rrf_score:.6f}) - "
+                f"{matched_service_name} (cosine_score={cosine_score:.4f}) - "
                 f"skipping discovery"
             )
 
@@ -634,7 +634,7 @@ async def execute_async(
                     context=context,
                     costs_metric=costs_metric,
                 )
-            time_metric["service.discovery"] = time.time() - start_time
+            time_metric["service.intent_detection"] = time.time() - start_time
 
         else:
             # LEGACY PATH: No hybrid search metadata (classifier disabled or error)
@@ -784,11 +784,11 @@ async def execute_streaming(
         if needs_llm_confirmation is False:
             # HIGH CONFIDENCE PATH: Skip discovery, use matched service
             matched_service_name = context.get("matched_service_name")
-            rrf_score = context.get("rrf_score", 0)
+            cosine_score = context.get("cosine_score", 0.0)
 
             logger.info(
                 f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (streaming): "
-                f"{matched_service_name} (rrf_score={rrf_score:.6f})"
+                f"{matched_service_name} (cosine_score={cosine_score:.4f})"
             )
 
             top_results = context.get("top_results", [])
@@ -831,7 +831,7 @@ async def execute_streaming(
                     context=context,
                     costs_metric=costs_metric,
                 )
-            time_metric["service.discovery"] = time.time() - start_time
+            time_metric["service.intent_detection"] = time.time() - start_time
 
         else:
             # LEGACY PATH: Full service discovery (original behavior)

From bb1601fd09c4db75c656978174f2da37b7dcc96c Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Sun, 8 Mar 2026 14:41:18 +0530
Subject: [PATCH 20/27] service integration

---
 .../hbs/bot_responses_to_messages.handlebars  |  14 +
 DSL/DMapper/rag-search/lib/helpers.js         |  29 ++
 DSL/Ruuter.public/services/GET/.guard         |  28 ++
 .../GET/account/user-profile-settings.yml     |  27 ++
 .../services/GET/active-services.yml          |  20 +
 DSL/Ruuter.public/services/GET/generic/.guard |   4 +
 .../accounts/customer-support-activity.yml    |  22 +
 .../accounts/user-profile-settings.yml        |  26 ++
 .../GET/generic/accounts/user-role.yml        |  19 +
 .../services/GET/generic/csa/active-chats.yml |  76 ++++
 .../services/GET/generic/userinfo.yml         |  29 ++
 DSL/Ruuter.public/services/GET/get-sticky.yml |  40 ++
 .../services/GET/internal/domain-file.yml     |  40 ++
 .../GET/internal/return-file-locations.yml    |  27 ++
 .../GET/mocks/client-input-variables.yml      |  19 +
 .../services/GET/mocks/service-settings.yml   |  19 +
 .../services/GET/mocks/validation-mock.yml    |  26 ++
 .../services/GET/rasa/rule-names.yml          |  29 ++
 .../services/GET/secrets-with-priority.yml    |  34 ++
 DSL/Ruuter.public/services/GET/secrets.yml    |  20 +
 .../services/GET/service-settings.yml         |  19 +
 .../services/GET/services/active/.guard       |   4 +
 .../services/GET/services/draft/.guard        |   4 +
 .../services/GET/services/inactive/.guard     |   4 +
 .../services/GET/services/log-by-request.yml  |  33 ++
 .../services/GET/services/log-by-service.yml  |  33 ++
 .../GET/services/services-detailed/nok.yml    |  52 +++
 .../services/GET/services/statistics.yml      |  21 +
 .../services/GET/services/status.yml          |  24 ++
 DSL/Ruuter.public/services/GET/slots.yml      |  26 ++
 .../services/GET/steps/preferences.yml        |  72 ++++
 .../services/GET/sticky/example.yml           |  58 +++
 DSL/Ruuter.public/services/POST/.guard        |  28 ++
 DSL/Ruuter.public/services/POST/auth/.guard   |   4 +
 .../services/POST/auth/login.yml              | 101 +++++
 DSL/Ruuter.public/services/POST/csv.yml       |  50 +++
 .../POST/dates/calculate-difference.yml       |  82 ++++
 .../services/POST/endpoints/common.yml        |  48 +++
 .../services/POST/file/rename.yml             |  42 ++
 .../services/POST/mocks/RBAC-mock.yml         |  50 +++
 .../POST/mocks/dates/calculate-difference.yml | 136 +++++++
 .../services/POST/mocks/service-settings.yml  |  19 +
 .../services/POST/mocks/services/add.yml      |  44 ++
 .../mocks/services/open-api-spec-mock.yml     |  17 +
 .../services/POST/mocks/user-info.yml         |  52 +++
 .../services/POST/mocks/validation-mock.yml   |  26 ++
 .../services/POST/rasa/rules/add.yml          | 129 ++++++
 .../services/POST/saveJsonToYml.yml           |  39 ++
 .../services/POST/service-by-id.yml           |  90 +++++
 .../services/POST/service-settings.yml        |  29 ++
 DSL/Ruuter.public/services/POST/services.yml  |  43 ++
 .../services/POST/services/active/.guard      |   4 +
 .../services/active/Broneeringu_kinnitus.yml  |  65 +++
 .../active/Kalastusloa_uuendamise_teade.yml   |  60 +++
 .../POST/services/active/Koolivaheajad.yml    |  63 +++
 .../services/active/Lihtne_test_teenus.yml    |  61 +++
 .../services/active/customer_feedback.yml     |  82 ++++
 .../services/POST/services/add.yml            | 177 ++++++++
 .../POST/services/create-endpoint.yml         |  46 +++
 .../POST/services/delete-endpoint.yml         |  34 ++
 .../services/POST/services/delete.yml         | 155 +++++++
 .../services/domain-intent-service-link.yml   | 157 ++++++++
 .../services/POST/services/draft/.guard       |   4 +
 .../services/POST/services/draft/test.tmp     |  48 +++
 .../services/POST/services/edit.yml           | 381 ++++++++++++++++++
 .../POST/services/endpoint-url-validation.yml |  32 ++
 .../POST/services/import-services.yml         |  71 ++++
 .../services/POST/services/inactive/.guard    |   4 +
 .../services/POST/services/open-api-spec.yml  |  34 ++
 .../POST/services/requests/explain.yml        |  95 +++++
 .../services/POST/services/resql/add.yml      |  62 +++
 .../services/POST/services/status.yml         | 303 ++++++++++++++
 .../POST/services/update-endpoint.yml         |  61 +++
 .../services/POST/steps/preferences.yml       |  68 ++++
 DSL/Ruuter.public/services/POST/user-info.yml |  16 +
 DSL/Ruuter.public/services/TEMPLATES/RBAC.yml |  51 +++
 .../TEMPLATES/check-user-authority.yml        |  50 +++
 .../services/TEMPLATES/client-input.yml       |  19 +
 .../services/TEMPLATES/direct-to-cs.yml       |  42 ++
 .../services/TEMPLATES/end-conversation.yml   |  42 ++
 .../services/TEMPLATES/file-generate.yml      |  45 +++
 .../services/TEMPLATES/file-signing.yml       |  35 ++
 .../services/TEMPLATES/open-webpage.yml       |  44 ++
 .../TEMPLATES/send-message-to-client.yml      |  42 ++
 DSL/Ruuter.public/services/TEMPLATES/siga.yml | 132 ++++++
 DSL/Ruuter.public/services/TEMPLATES/tara.yml |  51 +++
 .../TEMPLATES/validation-template.yml         |  56 +++
 constants.ini                                 |   5 +-
 docs/HYBRID_SEARCH_CLASSIFICATION.md          |  59 ++-
 docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md  | 262 ++++++------
 new.txt                                       |  38 ++
 src/tool_classifier/constants.py              |   3 +
 .../workflows/service_workflow.py             | 331 ++++++---------
 tests/data/classification_test_queries.json   | 266 ++++++++++++
 94 files changed, 5246 insertions(+), 337 deletions(-)
 create mode 100644 DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars
 create mode 100644 DSL/Ruuter.public/services/GET/.guard
 create mode 100644 DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
 create mode 100644 DSL/Ruuter.public/services/GET/active-services.yml
 create mode 100644 DSL/Ruuter.public/services/GET/generic/.guard
 create mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
 create mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
 create mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
 create mode 100644 DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
 create mode 100644 DSL/Ruuter.public/services/GET/generic/userinfo.yml
 create mode 100644 DSL/Ruuter.public/services/GET/get-sticky.yml
 create mode 100644 DSL/Ruuter.public/services/GET/internal/domain-file.yml
 create mode 100644 DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
 create mode 100644 DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
 create mode 100644 DSL/Ruuter.public/services/GET/mocks/service-settings.yml
 create mode 100644 DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
 create mode 100644 DSL/Ruuter.public/services/GET/rasa/rule-names.yml
 create mode 100644 DSL/Ruuter.public/services/GET/secrets-with-priority.yml
 create mode 100644 DSL/Ruuter.public/services/GET/secrets.yml
 create mode 100644 DSL/Ruuter.public/services/GET/service-settings.yml
 create mode 100644 DSL/Ruuter.public/services/GET/services/active/.guard
 create mode 100644 DSL/Ruuter.public/services/GET/services/draft/.guard
 create mode 100644 DSL/Ruuter.public/services/GET/services/inactive/.guard
 create mode 100644 DSL/Ruuter.public/services/GET/services/log-by-request.yml
 create mode 100644 DSL/Ruuter.public/services/GET/services/log-by-service.yml
 create mode 100644 DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
 create mode 100644 DSL/Ruuter.public/services/GET/services/statistics.yml
 create mode 100644 DSL/Ruuter.public/services/GET/services/status.yml
 create mode 100644 DSL/Ruuter.public/services/GET/slots.yml
 create mode 100644 DSL/Ruuter.public/services/GET/steps/preferences.yml
 create mode 100644 DSL/Ruuter.public/services/GET/sticky/example.yml
 create mode 100644 DSL/Ruuter.public/services/POST/.guard
 create mode 100644 DSL/Ruuter.public/services/POST/auth/.guard
 create mode 100644 DSL/Ruuter.public/services/POST/auth/login.yml
 create mode 100644 DSL/Ruuter.public/services/POST/csv.yml
 create mode 100644 DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
 create mode 100644 DSL/Ruuter.public/services/POST/endpoints/common.yml
 create mode 100644 DSL/Ruuter.public/services/POST/file/rename.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/service-settings.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/services/add.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/user-info.yml
 create mode 100644 DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
 create mode 100644 DSL/Ruuter.public/services/POST/rasa/rules/add.yml
 create mode 100644 DSL/Ruuter.public/services/POST/saveJsonToYml.yml
 create mode 100644 DSL/Ruuter.public/services/POST/service-by-id.yml
 create mode 100644 DSL/Ruuter.public/services/POST/service-settings.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/.guard
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/add.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/create-endpoint.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/delete.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/draft/.guard
 create mode 100644 DSL/Ruuter.public/services/POST/services/draft/test.tmp
 create mode 100644 DSL/Ruuter.public/services/POST/services/edit.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/import-services.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/inactive/.guard
 create mode 100644 DSL/Ruuter.public/services/POST/services/open-api-spec.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/requests/explain.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/resql/add.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/status.yml
 create mode 100644 DSL/Ruuter.public/services/POST/services/update-endpoint.yml
 create mode 100644 DSL/Ruuter.public/services/POST/steps/preferences.yml
 create mode 100644 DSL/Ruuter.public/services/POST/user-info.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/client-input.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/siga.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/tara.yml
 create mode 100644 DSL/Ruuter.public/services/TEMPLATES/validation-template.yml
 create mode 100644 new.txt
 create mode 100644 tests/data/classification_test_queries.json

diff --git a/DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars b/DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars
new file mode 100644
index 0000000..aa02301
--- /dev/null
+++ b/DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars
@@ -0,0 +1,14 @@
+[
+{{#each data.botMessages}}
+  {
+   "chatId": "{{../data.chatId}}",
+   "content": "{{filterControlCharacters result}}",
+   "buttons": "[{{#each ../data.buttons}}{\"title\": \"{{#if (eq title true)}}Yes{{else if (eq title false)}}No{{else}}{{{title}}}{{/if}}\",\"payload\": \"{{{payload}}}\"}{{#unless @last}},{{/unless}}{{/each}}]",
+   "authorTimestamp": "{{../data.authorTimestamp}}",
+   "authorId": "{{../data.authorId}}",
+   "authorFirstName": "{{../data.authorFirstName}}",
+   "authorLastName": "{{../data.authorLastName}}",
+   "created": "{{../data.created}}"
+  }{{#unless @last}},{{/unless}}
+{{/each}}
+]
diff --git a/DSL/DMapper/rag-search/lib/helpers.js b/DSL/DMapper/rag-search/lib/helpers.js
index 6f5e74f..7ecbb7c 100644
--- a/DSL/DMapper/rag-search/lib/helpers.js
+++ b/DSL/DMapper/rag-search/lib/helpers.js
@@ -168,6 +168,11 @@ export function getAgencyDataAvailable(agencyId) {
   return (combinedValue % 2) === 0;
 }
 
+export function filterControlCharacters(str) {
+  if (typeof str !== "string") return str;
+  return str.replace(/[\x00-\x1F\x7F]/g, " ");
+}
+
 export function json(context) {
   return JSON.stringify(context);
 }
@@ -269,3 +274,27 @@ export function filterDataByAgency(aggregatedData, startIndex, agencyId, pageSiz
   return JSON.stringify(result);
   
 }
+
+export function calculateDateDifference(value) {
+  const { startDate, endDate, outputType } = value;
+  const sDate = new Date(startDate);
+  const eDate = new Date(endDate);
+  const timeDifferenceInSeconds = (eDate.getTime() - sDate.getTime()) / 1000;
+
+  switch (outputType?.toLowerCase()) {
+    case 'years':
+      return eDate.getFullYear() - sDate.getFullYear();
+    case 'months':
+      return eDate.getMonth() - sDate.getMonth() +
+        (12 * (eDate.getFullYear() - sDate.getFullYear()))
+    case 'hours':
+      return Math.round(Math.abs(eDate - sDate) / 36e5);
+    case 'minutes':
+      return Math.floor(timeDifferenceInSeconds / 60);
+    case 'seconds':
+      return timeDifferenceInSeconds;
+    default:
+      return Math.round(timeDifferenceInSeconds / (3600 * 24));
+  }
+}
+
diff --git a/DSL/Ruuter.public/services/GET/.guard b/DSL/Ruuter.public/services/GET/.guard
new file mode 100644
index 0000000..4fd565b
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/.guard
@@ -0,0 +1,28 @@
+check_for_cookie:
+  switch:
+    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
+      next: guard_fail
+  next: authenticate
+
+authenticate:
+  template: "[#SERVICE_PROJECT_LAYER]/check-user-authority"
+  requestType: templates
+  headers:
+    cookie: ${incoming.headers.cookie}
+  result: authority_result
+
+check_authority_result:
+  switch:
+    - condition: ${authority_result !== "false"}
+      next: guard_success
+  next: guard_fail
+
+guard_success:
+  return: "success"
+  status: 200
+  next: end
+
+guard_fail:
+  return: "unauthorized"
+  status: 401
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml b/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
new file mode 100644
index 0000000..320d4af
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
@@ -0,0 +1,27 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USER-PROFILE-SETTINGS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+      response:
+        - userId: EE30303039914
+          forwardedChatPopupNotifications: false
+          forwardedChatSoundNotifications: false
+          forwardedChatEmailNotifications: false
+          newChatPopupNotifications: false
+          newChatSoundNotifications: false
+          newChatEmailNotifications: false
+          useAutocorrect: true
+  result: reflected_request
+
+step_2:
+  wrapper: false
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/active-services.yml b/DSL/Ruuter.public/services/GET/active-services.yml
new file mode 100644
index 0000000..873bb54
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/active-services.yml
@@ -0,0 +1,20 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ACTIVE-SERVICES'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_services_list:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-active-services-list"
+  result: results
+
+return_ok:
+  status: 200
+  wrapper: false
+  return: ${results.response.body}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/generic/.guard b/DSL/Ruuter.public/services/GET/generic/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml b/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
new file mode 100644
index 0000000..25c2e33
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
@@ -0,0 +1,22 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CUSTOMER-SUPPORT-ACTIVITY'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+      response:
+        - idCode: 'EE49902216518'
+          active: 'true'
+          status: 'idle'
+  result: reflected_request
+
+step_2:
+  wrapper: true
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml b/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
new file mode 100644
index 0000000..344b83c
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
@@ -0,0 +1,26 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USER-PROFILE-SETTINGS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+      data:
+        - userId: EE30303039914
+          forwardedChatPopupNotifications: false
+          forwardedChatSoundNotifications: false
+          forwardedChatEmailNotifications: false
+          newChatPopupNotifications: false
+          newChatSoundNotifications: false
+          newChatEmailNotifications: false
+          useAutocorrect: true
+  result: reflected_request
+
+step_2:
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml b/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
new file mode 100644
index 0000000..7794dbb
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USER-ROLE'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+      response:
+        - "ROLE_ADMINISTRATOR"
+  result: reflected_request
+
+step_2:
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml b/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
new file mode 100644
index 0000000..02d95eb
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
@@ -0,0 +1,76 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ACTIVE-CHATS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+      response:
+          - id: '22fa5630-6f92-4d50-92ba-685c872383af'
+            customerSupportId: ''
+            customerSupportDisplayName: ''
+            endUserId: ''
+            endUserFirstName: ''
+            endUserLastName: ''
+            status: 'OPEN'
+            created: '2023-01-17T13:18:38.808+00:00'
+            updated: '2023-01-17T13:19:26.348+00:00'
+            ended: null
+            endUserOs: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
+            endUserUrl: 'https://test.buerokratt.ee/'
+            forwardedToName: null
+            forwardedByUser: ''
+            forwardedFromCsa: ''
+            forwardedToCsa: ''
+            lastMessage: 'Suunan teid klienditeenindajale. Varuge natukene kannatust.'
+            contactsMessage: null
+            lastMessageTimestamp: '2023-01-17T13:19:26.316+00:00'
+          - id: '5206b7bd-0812-40a8-ae1d-3774f07f06f0'
+            customerSupportId: ''
+            customerSupportDisplayName: ''
+            endUserId: ''
+            endUserFirstName: ''
+            endUserLastName: ''
+            status: 'OPEN'
+            created: '2023-01-19T13:38:32.421+00:00'
+            updated: '2023-01-19T13:38:32.430+00:00'
+            ended: null
+            endUserOs: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
+            endUserUrl: 'https://test.buerokratt.ee/'
+            forwardedToName: null
+            forwardedByUser: ''
+            forwardedFromCsa: ''
+            forwardedToCsa: ''
+            lastMessage: 'aitäh'
+            contactsMessage: null
+            lastMessageTimestamp: '2022-11-23T09:33:56.803+00:00'
+          - id: 'b7bba1c2-b7ab-4b17-825a-2d66a7d16fc4'
+            customerSupportId: ''
+            customerSupportDisplayName: ''
+            endUserId: ''
+            endUserFirstName: ''
+            endUserLastName: ''
+            status: 'OPEN'
+            created: '2023-01-19T13:38:32.421+00:00'
+            updated: '2023-01-19T13:38:32.430+00:00'
+            ended: null
+            endUserOs: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
+            endUserUrl: 'https://test.buerokratt.ee/'
+            forwardedToName: null
+            forwardedByUser: ''
+            forwardedFromCsa: ''
+            forwardedToCsa: ''
+            lastMessage: 'dasnhpwa'
+            contactsMessage: null
+            lastMessageTimestamp: '2023-01-18T12:24:54.557+00:00'
+  result: reflected_request
+
+step_2:
+  wrapper: true
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/userinfo.yml b/DSL/Ruuter.public/services/GET/generic/userinfo.yml
new file mode 100644
index 0000000..1278132
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/generic/userinfo.yml
@@ -0,0 +1,29 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USERINFO'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+step_1:
+  call: reflect.mock
+  args:
+    response:
+        data:
+          firstName: OK
+          lastName: TESTNUMBER
+          idCode: EE30303039914
+          displayName: OK
+          JWTCreated: 1704724715000
+          login: EE30303039914
+          csaEmail: mail@mail.ee
+          authorities:
+            - ROLE_ADMINISTRATOR
+          csaTitle: OG
+          JWTExpirationTimestamp: 1704739715000
+  result: reflected_request
+
+step_2:
+  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/get-sticky.yml b/DSL/Ruuter.public/services/GET/get-sticky.yml
new file mode 100644
index 0000000..ca906d6
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/get-sticky.yml
@@ -0,0 +1,40 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'GET-STICKY'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    params:
+      - field: name
+        type: string
+        description: "Parameter 'name'"
+
+check_for_parameters:
+  switch:
+    - condition: ${incoming.params == null || incoming.params.name == null}
+      next: get_all_sticky_services
+  next: get_single_sticky_service
+
+get_single_sticky_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/ruuter/sticky/steps"
+    query:
+      name: ${incoming.params.name}
+  result: results
+  next: return_ok
+
+get_all_sticky_services:
+  call: http.get
+  args:
+    url: "[#SERVICE_DMAPPER]/ruuter/sticky"
+  result: results
+  next: return_ok
+
+return_ok:
+  status: 200
+  return: ${results.response.body}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/internal/domain-file.yml b/DSL/Ruuter.public/services/GET/internal/domain-file.yml
new file mode 100644
index 0000000..2e42dc9
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/internal/domain-file.yml
@@ -0,0 +1,40 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'DOMAIN-FILE'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+getFileLocations:
+  call: http.get
+  args:
+    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: fileLocations
+
+getDomainFile:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/read-file"
+    body:
+      file_path: ${fileLocations.response.body.response.domain_location}
+  result: domainFile
+
+convertYamlToJson:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/yaml_to_json"
+    body:
+      file: ${domainFile.response.body.file}
+  result: domainData
+
+return_value:
+  return: ${domainData.response.body}
diff --git a/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml b/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
new file mode 100644
index 0000000..0ebab6a
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
@@ -0,0 +1,27 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'RETURN-FILE-LOCATIONS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+assign_step:
+  assign:
+    locations:
+      rules_location: "[#DMAPPER_LOCATIONS_PATH]/data/rules.yml"
+      stories_location: "[#DMAPPER_LOCATIONS_PATH]/data/stories.yml"
+      domain_location: "[#DMAPPER_LOCATIONS_PATH]/data/domain.yml"
+      test_stories_location: "[#DMAPPER_LOCATIONS_PATH]/test_stories.yml"
+      intents_location: "[#DMAPPER_LOCATIONS_PATH]/nlu/"
+      regex_location: "[#DMAPPER_LOCATIONS_PATH]/regex"
+      training_result_location: "[#DMAPPER_LOCATIONS_PATH]/results"
+      config_location: "[#DMAPPER_LOCATIONS_PATH]/data/config.yml"
+return_value:
+  return: ${locations}
diff --git a/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml b/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
new file mode 100644
index 0000000..dab285a
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CLIENT-INPUT-VARIABLES'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+mock_variables:
+  call: reflect.mock
+  args:
+    response:
+      data: ['{{user.firstname}}', '{{user.lastname}}', '{{user.birthdate}}', '{{user.email}}', '{{invoice.total}}', '{{invoice.subtotal}}']
+  result: mock_res
+
+return_result:
+  wrapper: false
+  return: ${mock_res.response.body.data}
diff --git a/DSL/Ruuter.public/services/GET/mocks/service-settings.yml b/DSL/Ruuter.public/services/GET/mocks/service-settings.yml
new file mode 100644
index 0000000..35482b2
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/mocks/service-settings.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICE-SETTINGS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+call_mock:
+  call: reflect.mock
+  args:
+    response:
+      maxInputTry: 4
+  result: mock_res
+
+return_result:
+  wrapper: false
+  return: ${mock_res.response.body}
diff --git a/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml b/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
new file mode 100644
index 0000000..e445255
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
@@ -0,0 +1,26 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'VALIDATION-MOCK'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+call_mock:
+  call: reflect.mock
+  args:
+    response:
+      project: "Bürokratt"
+      website: "www.kratid.ee"
+  result: mock_res
+
+call_template:
+  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
+  requestType: templates
+  body:
+    response: ${mock_res.response}
+  result: templateResult
+
+return_result:
+  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/GET/rasa/rule-names.yml b/DSL/Ruuter.public/services/GET/rasa/rule-names.yml
new file mode 100644
index 0000000..5fd45b2
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/rasa/rule-names.yml
@@ -0,0 +1,29 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'RULE-NAMES'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+getRules:
+  call: http.get
+  args:
+    url: "[#SERVICE_OPENSEARCH]/rules/_search?size=1000"
+  result: getRulesResult
+
+mapRulesData:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/get_rule_names"
+    headers:
+      type: 'json'
+    body:
+      hits: ${getRulesResult.response.body.hits.hits}
+  result: rulesData
+  next: returnSuccess
+
+returnSuccess:
+  return: ${rulesData.response.body}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/secrets-with-priority.yml b/DSL/Ruuter.public/services/GET/secrets-with-priority.yml
new file mode 100644
index 0000000..602eeff
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/secrets-with-priority.yml
@@ -0,0 +1,34 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SECRETS-WITH-PRIORITY'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+check_parameters:
+  switch:
+    - condition: ${incoming.params !== null && incoming.params.type === 'test'}
+      next: get_ruuter_secrets_test_priority
+  next: get_ruuter_secrets_prod_priority
+
+get_ruuter_secrets_prod_priority:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/secrets/get-with-priority"
+  result: results
+  next: return_ok
+
+get_ruuter_secrets_test_priority:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/secrets/get-with-priority?priority=test"
+  result: results
+  next: return_ok
+
+return_ok:
+  status: 200
+  wrapper: false
+  return: ${results.response.body}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/secrets.yml b/DSL/Ruuter.public/services/GET/secrets.yml
new file mode 100644
index 0000000..60913c8
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/secrets.yml
@@ -0,0 +1,20 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SECRETS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_ruuter_secrets:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/secrets/get-all"
+  result: results
+
+return_ok:
+  status: 200
+  wrapper: false
+  return: ${results.response.body}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/service-settings.yml b/DSL/Ruuter.public/services/GET/service-settings.yml
new file mode 100644
index 0000000..abe4ff0
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/service-settings.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICE-SETTINGS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+updateSettings:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-settings"
+  result: results
+
+returnSuccess:
+  wrapper: false
+  status: 200
+  return: ${results.response.body}
diff --git a/DSL/Ruuter.public/services/GET/services/active/.guard b/DSL/Ruuter.public/services/GET/services/active/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/active/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/draft/.guard b/DSL/Ruuter.public/services/GET/services/draft/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/draft/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/inactive/.guard b/DSL/Ruuter.public/services/GET/services/inactive/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/inactive/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/log-by-request.yml b/DSL/Ruuter.public/services/GET/services/log-by-request.yml
new file mode 100644
index 0000000..9ee096b
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/log-by-request.yml
@@ -0,0 +1,33 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'LOG-BY-REQUEST'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_services_stat:
+  call: http.post
+  args:
+    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
+    body:
+      id: 'get-log-by-request'
+      params: ${incoming.params}
+  result: results
+
+check_result:
+  switch:
+    - condition: ${results.response.body.found === true}
+      next: return_ok
+  next: return_not_found
+
+return_not_found:
+  status: 404
+  return: 'index not found'
+  next: end
+
+return_ok:
+  status: 200
+  return: ${results.response.body._source}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/log-by-service.yml b/DSL/Ruuter.public/services/GET/services/log-by-service.yml
new file mode 100644
index 0000000..0613800
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/log-by-service.yml
@@ -0,0 +1,33 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'LOG-BY-SERVICE'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_services_stat:
+  call: http.post
+  args:
+    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
+    body:
+      id: 'get-log-by-service'
+      params: ${incoming.params}
+  result: results
+
+check_result:
+  switch:
+    - condition: ${results.response.body.found === true}
+      next: return_ok
+  next: return_not_found
+
+return_not_found:
+  status: 404
+  return: 'index not found'
+  next: end
+
+return_ok:
+  status: 200
+  return: ${results.response.body._source}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml b/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
new file mode 100644
index 0000000..5fea871
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
@@ -0,0 +1,52 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'NOK'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    params:
+      - field: page
+        type: number
+        description: "Parameter 'page'"
+      - field: page_size
+        type: number
+        description: "Parameter 'page_size'"
+      - field: sorting
+        type: string
+        description: "Parameter 'sorting'"
+      - field: order
+        type: string
+        description: "Parameter 'order'"  
+
+getFaults:
+  call: http.post
+  args:
+    url: "[#SERVICE_OPENSEARCH]/ruuterlog/_search"
+    query:
+      from: ${(incoming.params.page - 1) * incoming.params.page_size}
+      size: ${incoming.params.page_size}
+      _source_excludes: "stackTrace,statusCode"
+    body:
+      sort: [{ "timestamp": { "order": "${incoming.params.order}" } }]
+      query:
+        match_phrase_prefix:
+          dslName:
+            query: "services/active"
+  result: getFaultsResult
+
+mapFaultsData:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/get-faults"
+    headers:
+      type: "json"
+    body:
+      data: { "hits": "${getFaultsResult.response.body.hits.hits}" }
+  result: faultsData
+
+returnSuccess:
+  wrapper: false
+  return: ${[faultsData.response.body, getFaultsResult.response.body.hits.total.value]}
diff --git a/DSL/Ruuter.public/services/GET/services/statistics.yml b/DSL/Ruuter.public/services/GET/services/statistics.yml
new file mode 100644
index 0000000..6b3110f
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/statistics.yml
@@ -0,0 +1,21 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'STATISTICS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_services_stat:
+  call: http.post
+  args:
+    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
+    body:
+      id: 'get-services-stat'
+  result: results
+
+return_ok:
+  status: 200
+  return: ${results.response.body.hits.hits}
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/status.yml b/DSL/Ruuter.public/services/GET/services/status.yml
new file mode 100644
index 0000000..779451a
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/services/status.yml
@@ -0,0 +1,24 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'STATUS'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    params:
+      - field: id
+        type: string
+        description: "Parameter 'id'"
+
+get_status:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/status"
+    body:
+      id: ${incoming.params.id}
+  result: res
+
+return_value:
+  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/GET/slots.yml b/DSL/Ruuter.public/services/GET/slots.yml
new file mode 100644
index 0000000..d38375c
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/slots.yml
@@ -0,0 +1,26 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Get slots from OpenSearch"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+getSlots:
+  call: http.post
+  args:
+    url: "[#SERVICE_OPENSEARCH]/domain/_search/template"
+    body:
+      id: "domain-objects-with-pagination"
+      params:
+        type: "slots"
+        filter: ""
+        from: 0
+        size: 1000
+  result: getSlotsResult
+
+returnSuccess:
+  return: ${getSlotsResult.response.body.hits.hits[0].fields.filtered_items[0]}
+  wrapper: false
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/steps/preferences.yml b/DSL/Ruuter.public/services/GET/steps/preferences.yml
new file mode 100644
index 0000000..283ac69
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/steps/preferences.yml
@@ -0,0 +1,72 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'PREFERENCES'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+
+get_user_info:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
+    contentType: plaintext
+    headers:
+      cookie: ${incoming.headers.cookie}
+    plaintext: "customJwtCookie"
+  result: res
+
+check_user_info_response:
+  switch:
+    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
+      next: assignIdCode
+  next: return_unauthorized
+
+assignIdCode:
+  assign:
+    idCode: ${res.response.body.idCode}
+
+get_user_step_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-user-step-preferences"
+    body:
+      user_id_code: ${idCode}
+  result: preferences
+
+check_preferences_response:
+  switch:
+    - condition: ${preferences.response.body.length > 0}
+      next: return_preferences
+  next: seed_default_user_preferences
+
+seed_default_user_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/seed-user-step-preferences"
+    body:
+      user_id_code: ${idCode}
+  result: seed_preferences_res
+  next: refetch_user_step_preferences
+
+refetch_user_step_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-user-step-preferences"
+    body:
+      user_id_code: ${idCode}
+  result: refetched_preferences
+
+return_refetched_preferences:
+  return: ${refetched_preferences.response.body[0]}
+  next: end
+
+return_preferences:
+  return: ${preferences.response.body[0]}
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "unauthorized"
+  next: end
diff --git a/DSL/Ruuter.public/services/GET/sticky/example.yml b/DSL/Ruuter.public/services/GET/sticky/example.yml
new file mode 100644
index 0000000..cedef90
--- /dev/null
+++ b/DSL/Ruuter.public/services/GET/sticky/example.yml
@@ -0,0 +1,58 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'EXAMPLE'"
+  method: get
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+check_for_cookie:
+  switch:
+    - condition: ${incoming.headers.cookie == null || incoming.headers.cookie == ""}
+      next: return_unauthorized
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    cookie: ${incoming.headers.cookie}
+
+extract_cookie_data:
+  call: http.post
+  args:
+    url: "[#SERVICE_RUUTER]/mocks/mock-custom-jwt-userinfo"
+    headers:
+      cookie: ${cookie}
+    body:
+      cookieName: "customJwtCookie"
+  result: jwtResult
+  next: allow_only_admins
+
+allow_only_admins:
+  switch:
+    - condition: ${jwtResult.response.body.response.authorities.includes("ROLE_ADMIN")}
+      next: get_data
+  next: return_unauthorized
+
+get_data:
+  call: reflect.mock
+  args:
+    response:
+      type: "mock-value"
+      id: 1234567
+  result: reflectedRequest
+  next: return_value
+
+return_value:
+  return: ${reflectedRequest.response.body}
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "unauthorized"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/.guard b/DSL/Ruuter.public/services/POST/.guard
new file mode 100644
index 0000000..4fd565b
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/.guard
@@ -0,0 +1,28 @@
+check_for_cookie:
+  switch:
+    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
+      next: guard_fail
+  next: authenticate
+
+authenticate:
+  template: "[#SERVICE_PROJECT_LAYER]/check-user-authority"
+  requestType: templates
+  headers:
+    cookie: ${incoming.headers.cookie}
+  result: authority_result
+
+check_authority_result:
+  switch:
+    - condition: ${authority_result !== "false"}
+      next: guard_success
+  next: guard_fail
+
+guard_success:
+  return: "success"
+  status: 200
+  next: end
+
+guard_fail:
+  return: "unauthorized"
+  status: 401
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/auth/.guard b/DSL/Ruuter.public/services/POST/auth/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/auth/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/auth/login.yml b/DSL/Ruuter.public/services/POST/auth/login.yml
new file mode 100644
index 0000000..3077193
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/auth/login.yml
@@ -0,0 +1,101 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'LOGIN'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: login
+        type: string
+        description: "Body field 'login'"
+      - field: password
+        type: string
+        description: "Body field 'password'"
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.login == null || incoming.body.password == null}
+      next: return_incorrect_request
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    login: ${incoming.body.login}
+    password: ${incoming.body.password}
+  next: login_user
+
+login_user:
+  call: http.post
+  args:
+    url: "[#SERVICE_USERS_RESQL]/get-user-by-login"
+    body:
+      login: ${login}
+      password: ${password}
+  result: results
+  next: check_login_result  
+
+check_login_result:
+  switch:
+    - condition: ${results.response.body.length != 0}
+      next: get_session_length
+  next: return_user_not_found
+
+get_session_length:
+  call: http.post
+  args:
+    url: "[#SERVICE_USERS_RESQL]/get-configuration"
+    body:
+      key: "session_length"
+  result: session_result
+  next: check_session_length_result
+
+check_session_length_result:
+  switch:
+    - condition: ${session_result.response.body.length != 0}
+      next: generate_cookie
+  next: return_session_length_not_found
+
+generate_cookie:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-generate"
+    body:
+      JWTName: "customJwtCookie"
+      expirationInMinutes: ${session_result.response.body[0].value}
+      content: ${results.response.body[0]}
+  result: cookie_result
+  next: assign_cookie
+
+assign_cookie:
+  assign:
+    setCookie:
+      customJwtCookie: ${cookie_result.response.body.token}
+      Domain: "[#DOMAIN]"
+      Secure: true
+      HttpOnly: true
+      SameSite: "Lax"
+  next: return_value
+
+return_value:
+  headers:
+    Set-Cookie: ${setCookie}
+  return: ${cookie_result.response.body.token}
+  next: end 
+
+return_session_length_not_found:
+  status: 400
+  return: "Could not fetch session length"
+  next: end  
+
+return_user_not_found:
+  status: 400
+  return: "User Not Found"
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/csv.yml b/DSL/Ruuter.public/services/POST/csv.yml
new file mode 100644
index 0000000..52e1f17
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/csv.yml
@@ -0,0 +1,50 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CSV'"
+  method: post
+  accepts: json
+  returns: data
+  namespace: service
+  allowlist:
+    body:
+      - field: data
+        type: string
+        description: "Body field 'data'"
+      - field: del
+        type: string
+        description: "Body field 'del'"
+      - field: qul
+        type: string
+        description: "Body field 'qul'"
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body == null}
+      next: return_incorrect_request
+  next: get_csv   
+
+get_csv:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/get-csv"
+    headers:
+      type: 'csv'
+    body:
+      data: ${incoming.body.data}
+      del: ${incoming.body.del}
+      qul: ${incoming.body.qul}
+      layout: false
+  result: result
+
+return_value:
+  wrapper: false
+  headers:
+    Content-disposition: "attachment;filename=result.csv"
+  return: ${result.response.body.response}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: 'missing parameters'
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml b/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
new file mode 100644
index 0000000..2c5afd4
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
@@ -0,0 +1,82 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CALCULATE-DIFFERENCE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: startDate
+        type: string
+        description: "Body field 'startDate'"
+      - field: endDate
+        type: string
+        description: "Body field 'endDate'"
+    params:
+      - field: outputType
+        type: string
+        description: "Parameter 'outputType'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: return_incorrect_request
+  next: check_for_required_parameters
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.startDate == null}
+      next: return_incorrect_request
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    outputType: ${incoming.params.outputType ?? 'days'}
+    startDate: ${incoming.body.startDate}
+    endDate: ${incoming.body.endDate ?? new Date().toISOString()}
+
+check_is_end_date_greater_than_start_date:
+  switch:
+    - condition: ${new Date(endDate) < new Date(startDate)}
+      next: return_incorrect_date
+
+check_is_output_type_valid:
+  switch:
+    - condition: ${outputType !== null && !['years','months','hours','days','minutes', 'seconds'].includes(outputType)}
+      next: return_incorrect_output_type
+
+calculate_difference:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/calculate-date-difference"
+    headers:
+      type: "json"
+    body:
+      startDate: ${startDate}
+      endDate: ${endDate}
+      outputType: ${outputType}
+      layout: false
+  result: result
+  next: return_value
+
+return_value:
+  status: 200
+  return: ${result.response.body}
+  next: end
+
+return_incorrect_date:
+  status: 400
+  return: "Start date can not be greater than the end date/ today"
+  next: end
+
+return_incorrect_output_type:
+  status: 400
+  return: "Output type must be: years, months, hours, days, minutes or seconds"
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Start date is required"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/endpoints/common.yml b/DSL/Ruuter.public/services/POST/endpoints/common.yml
new file mode 100644
index 0000000..611faca
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/endpoints/common.yml
@@ -0,0 +1,48 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'Common'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: pagination
+        type: string
+        description: "Body field 'pagination'"
+      - field: page
+        type: string
+        description: "Body field 'page'"
+      - field: pageSize
+        type: string
+        description: "Body field 'pageSize'"
+      - field: sorting
+        type: string
+        description: "Body field 'sorting'"  
+      - field: search
+        type: string
+        description: "Body field 'search'"
+
+extract_request_data:
+  assign:
+    pagination: ${incoming.body.pagination}
+    page: ${incoming.body.page}
+    pageSize: ${incoming.body.pageSize}
+    sorting: ${incoming.body.sorting}
+    search: ${incoming.body.search}
+
+get_common_endpoints:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/get_common_endpoints"
+    body:
+      pagination: ${pagination}
+      page: ${page}
+      page_size: ${pageSize}
+      sorting: ${sorting}
+      search: ${search}
+  result: res
+
+return_result:
+  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/file/rename.yml b/DSL/Ruuter.public/services/POST/file/rename.yml
new file mode 100644
index 0000000..c0e7b1c
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/file/rename.yml
@@ -0,0 +1,42 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'RENAME'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: current_path
+        type: string
+        description: "Body field 'current_path'"
+      - field: new_path
+        type: string
+        description: "Body field 'new_path'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.current_path == null || incoming.body.new_path == null}
+      next: return_incorrect_request
+
+rename_file:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/rename"
+    headers:
+      type: 'json'
+    body:
+      current_path: ${incoming.body.current_path}
+      new_path: ${incoming.body.new_path}
+      layout: false
+  result: result
+
+return_value:
+  status: 200
+  return: ${result.response.body}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Start date is required"
diff --git a/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml b/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
new file mode 100644
index 0000000..eabaf3f
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
@@ -0,0 +1,50 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'RBAC-MOCK'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: allowedRoles
+        type: object
+        description: "Body field 'allowedRoles'"
+      - field: userId
+        type: string
+        description: "Body field 'userId'"
+
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: missing_parameter
+
+extract_request_data:
+  assign:
+    userId: ${incoming.body.userId}
+    allowedRoles: ${incoming.body.allowedRoles.sort()}
+
+check_for_required_parameters:
+  switch:
+    - condition: ${userId === null || allowedRoles === null}
+      next: missing_parameter
+  next: fetch_user_roles_from_db
+
+fetch_user_roles_from_db:
+  call: reflect.mock
+  args:
+    response:
+      isAllowed: TRUE
+  result: result
+
+return_value:
+  status: 200
+  return: "${result.response.body}"
+  next: end
+
+missing_parameter:
+  status: 400
+  return: "userId, allowedRoles - missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml b/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
new file mode 100644
index 0000000..261e227
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
@@ -0,0 +1,136 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CALCULATE-DIFFERENCE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: startDate
+        type: string
+        description: "Body field 'startDate'"
+      - field: endDate
+        type: string
+        description: "Body field 'endDate'"
+    params:
+      - field: outputType
+        type: string
+        description: "Parameter 'outputType'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: return_incorrect_request
+  next: check_for_required_parameters
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.startDate == null}
+      next: return_incorrect_request
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    outputType: ${incoming.params.outputType ?? 'days'}
+    startDate: ${incoming.body.startDate}
+    endDate: ${incoming.body.endDate ?? new Date().toISOString()}
+
+check_is_end_date_greater_than_start_date:
+  switch:
+    - condition: ${new Date(endDate) < new Date(startDate)}
+      next: return_incorrect_date
+
+check_is_output_type_valid:
+  switch:
+    - condition: ${outputType !== null && !['years','months','hours','days','minutes', 'seconds'].includes(outputType)}
+      next: return_incorrect_output_type
+
+calculate_difference:
+  switch:
+    - condition: ${outputType === 'years'}
+      next: calculate_difference_in_years
+    - condition: ${outputType === 'months'}
+      next: calculate_difference_in_months
+    - condition: ${outputType === 'hours'}
+      next: calculate_difference_in_hours
+    - condition: ${outputType === 'minutes'}
+      next: calculate_difference_in_minutes
+    - condition: ${outputType === 'seconds'}
+      next: calculate_difference_in_seconds
+  next: calculate_difference_in_days
+
+calculate_difference_in_years:
+  call: reflect.mock
+  args:
+    response: {
+      result: 0
+    }
+  result: result
+  next: return_value
+
+calculate_difference_in_months:
+  call: reflect.mock
+  args:
+    response: {
+      result: 11
+    }
+  result: result
+  next: return_value
+
+calculate_difference_in_days:
+  call: reflect.mock
+  args:
+    response: {
+      result: 1
+    }
+  result: result
+  next: return_value
+
+calculate_difference_in_hours:
+  call: reflect.mock
+  args:
+    response: {
+      result: 24
+    }
+  result: result
+  next: return_value
+
+calculate_difference_in_minutes:
+  call: reflect.mock
+  args:
+    response: {
+      result: 59
+    }
+  result: result
+  next: return_value
+
+calculate_difference_in_seconds:
+  call: reflect.mock
+  args:
+    response: {
+      result: 201
+    }
+  result: result
+  next: return_value
+
+return_value:
+  status: 200
+  return: ${result.response.body}
+  next: end
+
+return_incorrect_date:
+  status: 400
+  return: "Start date can not be greater than the end date/ today"
+  next: end
+
+return_incorrect_output_type:
+  status: 400
+  return: "Output type must be: years, months, hours, days, minutes or seconds"
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Start date is required"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/mocks/service-settings.yml b/DSL/Ruuter.public/services/POST/mocks/service-settings.yml
new file mode 100644
index 0000000..c22da07
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/service-settings.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICE-SETTINGS'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+
+call_mock:
+  call: reflect.mock
+  args:
+    response:
+      maxInputTry: 4
+  result: mock_res
+
+return_result:
+  wrapper: false
+  return: ${mock_res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/mocks/services/add.yml b/DSL/Ruuter.public/services/POST/mocks/services/add.yml
new file mode 100644
index 0000000..3b1e46c
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/services/add.yml
@@ -0,0 +1,44 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ADD'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: name
+        type: string
+        description: "Body field 'name'"
+      - field: description
+        type: string
+        description: "Body field 'description'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: return_incorrect_request
+  next: check_for_required_parameters
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.name == null || incoming.body.description == null}
+      next: return_incorrect_request
+  next: service_add
+
+service_add:
+  call: reflect.mock
+  args:
+    response: {}
+  result: createdService
+
+return_value:
+  status: 201
+  wrapper: FALSE
+  return: ""
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
diff --git a/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml b/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
new file mode 100644
index 0000000..e8ea3f8
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
@@ -0,0 +1,17 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'OPEN-API-SPEC-MOCK'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+
+get_message:
+  call: http.get
+  args:
+    url: https://petstore3.swagger.io/api/v3/openapi.json
+  result: res
+
+return_value:
+  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/mocks/user-info.yml b/DSL/Ruuter.public/services/POST/mocks/user-info.yml
new file mode 100644
index 0000000..ac0b322
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/user-info.yml
@@ -0,0 +1,52 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USER-INFO'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: isTokenExpired
+        type: boolean
+        description: "Body field 'isTokenExpired'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: return_user_info_mock
+  next: do_decision
+
+do_decision:
+  switch:
+    - condition: ${incoming.body.isTokenExpired === true}
+      next: return_unauthorized
+  next: return_user_info_mock
+
+return_unauthorized:
+  status: 401
+  return: "Unauthorized"
+  next: end
+
+return_user_info_mock:
+  call: reflect.mock
+  args:
+    response:
+      sub: ""
+      firstName: "MARY ÄNN"
+      idCode: "EE60001019906"
+      displayName: "MARY ÄNN"
+      iss: "test.buerokratt.ee"
+      exp: 1670250948
+      login: "EE60001019906"
+      iat: 1670243748
+      jti: "e14a5084-3b30-4a55-8720-c2ee22f43c2c"
+      authorities: [
+        "ROLE_ADMINISTRATOR"
+      ]
+  result: reflected_request
+  next: return_value
+
+return_value:
+  return: ${reflected_request.response}
diff --git a/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml b/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
new file mode 100644
index 0000000..fb8cb4a
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
@@ -0,0 +1,26 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'VALIDATION-MOCK'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+
+call_mock:
+  call: reflect.mock
+  args:
+    response:
+      project: "Bürokratt"
+      website: "www.kratid.ee"
+  result: mock_res
+
+call_template:
+  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
+  requestType: templates
+  body:
+    response: ${mock_res.response}
+  result: templateResult
+
+return_result:
+  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/POST/rasa/rules/add.yml b/DSL/Ruuter.public/services/POST/rasa/rules/add.yml
new file mode 100644
index 0000000..c41581a
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/rasa/rules/add.yml
@@ -0,0 +1,129 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ADD'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: data
+        type: object
+        description: "Body field 'data'"
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+assign_values:
+  assign:
+    body: ${incoming.body.data}
+
+validateRules:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/validate/validate-stories-rules"
+    body:
+      story: ${body}
+      category: "rules"
+  result: validateRulesResult
+
+validateRulesCheck:
+  switch:
+    - condition: ${validateRulesResult.response.body.result == true}
+      next: getRuleNames
+  next: returnDuplicateIntentOrEntity
+
+getRuleNames:
+  call: http.get
+  args:
+    url: "[#SERVICE_RUUTER]/rasa/rule-names"
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: ruleResult
+
+validateRuleName:
+  switch:
+    - condition: ${ruleResult.response.body.response.names == null}
+      next: getFileLocations
+    - condition: ${!ruleResult.response.body.response.names.includes(body.id)}
+      next: getFileLocations
+  next: returnStoryExists
+
+getFileLocations:
+  call: http.get
+  args:
+    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: fileLocations
+
+getRulesFile:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/read-file"
+    body:
+      file_path: ${fileLocations.response.body.response.rules_location}
+  result: ruleFile
+
+convertYamlToJson:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/yaml_to_json"
+    body:
+      file: ${ruleFile.response.body.file}
+  result: rulesData
+
+mergeRules:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/merge"
+    body:
+      array1: ${rulesData.response.body.rules ?? []}
+      array2: ${[body]}
+      iteratee: "rule"
+  result: mergedRules
+
+convertJsonToYaml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json-to-yaml-stories"
+    headers:
+      content-type: "application/json"
+    body:
+      rules: ${mergedRules.response.body.array}
+  result: rulesYaml
+
+saveRulesFile:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: ${fileLocations.response.body.response.rules_location}
+      content: ${rulesYaml.response.body.json}
+  result: fileResult
+  next: updateOpenSearch
+
+updateOpenSearch:
+  call: http.post
+  args:
+    url: "[#SERVICES_PIPELINE]/bulk/rules/rule"
+    body:
+      input: ${rulesYaml.response.body.json}
+  result: updateSearchResult
+  next: returnSuccess
+
+returnSuccess:
+  return: "Rule added"
+  next: end
+
+returnRuleExists:
+  return: "Rule exists"
+  status: 409
+  next: end
+
+returnDuplicateIntentOrEntity:
+  return: "Rule may not have duplicate consecutive intents or entities"
+  status: 406
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/saveJsonToYml.yml b/DSL/Ruuter.public/services/POST/saveJsonToYml.yml
new file mode 100644
index 0000000..4305cb2
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/saveJsonToYml.yml
@@ -0,0 +1,39 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SAVEJSONTOYML'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: result
+        type: string
+        description: "Body field 'result'"
+    params:
+      - field: location
+        type: string
+        description: "Parameter 'location'"
+
+toYml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
+    body:
+      data: ${incoming.body.result}
+  result: r
+
+saveFile:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: ${incoming.params.location}
+      content: ${r.response.body.yaml}
+  result: fileResult
+
+saved_seccessfully:
+  reloadDsl: true
+  return: ""
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/service-by-id.yml b/DSL/Ruuter.public/services/POST/service-by-id.yml
new file mode 100644
index 0000000..45fd1b0
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/service-by-id.yml
@@ -0,0 +1,90 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICE-BY-ID'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    header:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+    body:
+      - field: id
+        type: string
+        description: "Body field 'id'"
+      - field: search
+        type: string
+        description: "Body field 'search'"
+
+extract_request_data:
+  assign:
+    id: ${incoming.body.id}
+    search: ${incoming.body.search}
+
+get_user_info:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
+    contentType: plaintext
+    headers:
+      cookie: ${incoming.headers.cookie}
+    plaintext: "customJwtCookie"
+  result: res
+
+check_user_info_response:
+  switch:
+    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
+      next: assignIdCode
+  next: return_unauthorized
+
+assignIdCode:
+  assign:
+    idCode: ${res.response.body.idCode}
+
+get_service_by_id:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-by-id"
+    body:
+      id: ${id}
+  result: service_results
+
+get_endpoints_by_service_id:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/get_endpoints_by_service_id"
+    body:
+      id: ${id}
+      user_id_code: ${idCode}
+      search: ${search}
+  result: endpoints_results
+
+prepare_results:
+  assign:
+    results:
+      id: ${service_results.response.body[0].id}
+      name: ${service_results.response.body[0].name}
+      description: ${service_results.response.body[0].description}
+      slot: ${service_results.response.body[0].slot}
+      examples: ${service_results.response.body[0].examples}
+      entities: ${service_results.response.body[0].entities}
+      state: ${service_results.response.body[0].state}
+      type: ${service_results.response.body[0].type}
+      isCommon: ${service_results.response.body[0].isCommon}
+      structure: ${service_results.response.body[0].structure}
+      endpoints: ${endpoints_results.response.body}
+      serviceId: ${service_results.response.body[0].serviceId}
+
+return_ok:
+  status: 200
+  wrapper: false
+  return: ${results}
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "unauthorized"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/service-settings.yml b/DSL/Ruuter.public/services/POST/service-settings.yml
new file mode 100644
index 0000000..af9021c
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/service-settings.yml
@@ -0,0 +1,29 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICE-SETTINGS'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: name
+        type: string
+        description: "Body field 'name'"
+      - field: value
+        type: string
+        description: "Body field 'value'"
+
+updateSettings:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/update-settings"
+    body:
+      name: ${incoming.body.name}
+      value: ${incoming.body.value}
+  result: getResult
+
+returnSuccess:
+  status: 200
+  return: 'ok'
diff --git a/DSL/Ruuter.public/services/POST/services.yml b/DSL/Ruuter.public/services/POST/services.yml
new file mode 100644
index 0000000..8188fac
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services.yml
@@ -0,0 +1,43 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SERVICES'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: page
+        type: number
+        description: "Body field 'page'"
+      - field: page_size
+        type: number
+        description: "Body field 'page_size'"
+      - field: sorting
+        type: string
+        description: "Body field 'sorting'"
+      - field: is_common
+        type: boolean
+        description: "Body field 'is_common'"  
+
+get_services_list:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-services-list"
+    body:
+      page: ${incoming.body.page}
+      page_size: ${incoming.body.page_size}
+      sorting: ${incoming.body.sorting}
+      is_common: ${incoming.body.is_common}
+  limit: 400
+  result: services_res
+
+assign_services_result:
+  assign:
+    services: ${services_res.response.body}
+
+return_ok:
+  status: 200
+  return: ${[services]}
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/active/.guard b/DSL/Ruuter.public/services/POST/services/active/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml b/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
new file mode 100644
index 0000000..ff07aba
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
@@ -0,0 +1,65 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Teenuse test, mis kinnitab kasutaja broneeringu ja tagastab
+    sisestatud väärtuse.
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: assign_1
+assign_1:
+  assign:
+    entity: ${incoming.body.input[0]}
+  next: sõnum_kliendile_1
+sõnum_kliendile_1:
+  assign:
+    res:
+      result: "Teie broneering on registreeritud. Kohtumiseni! Entity: ${entity}"
+  next: teenuse_lõpetamine_1
+teenuse_lõpetamine_1:
+  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
+  requestType: templates
+  body:
+    message: ""
+  result: teenuse_lõpetamine_1_result
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml b/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
new file mode 100644
index 0000000..18b213d
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
@@ -0,0 +1,60 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Teenuse test, mis teavitab kasutajat, et tema kalastusluba vajab uuendamist.
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: send_message_to_client_1
+send_message_to_client_1:
+  assign:
+    res:
+      result: Su kalastusluba vajab uuendamist!
+  next: end_service_1
+end_service_1:
+  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
+  requestType: templates
+  body:
+    message: ""
+  result: end_service_1_result
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml b/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
new file mode 100644
index 0000000..df123c0
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
@@ -0,0 +1,63 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Kasutaja soovib infot koolivaheaegade kohta antud õppeaastal.
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: sõnum_kliendile_1
+sõnum_kliendile_1:
+  assign:
+    res:
+      result: E 20. oktoober 2025 - P 26. oktoober 2025 Sügisvaheaeg E 22. detsember
+        2025 - P 11. jaanuar 2026 Jõuluvaheaeg E 23. veebruar 2026 - P 01. märts
+        2026 Talvevaheaeg E 13. aprill 2026 - P 19. aprill 2026 Kevadvaheaeg K
+        17. juuni 2026 - E 31. august 2026 Suvevaheaeg
+  next: teenuse_lõpetamine_1
+teenuse_lõpetamine_1:
+  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
+  requestType: templates
+  body:
+    message: ""
+  result: teenuse_lõpetamine_1_result
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml b/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
new file mode 100644
index 0000000..67373c3
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
@@ -0,0 +1,61 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Lihtne testteenus, mis küsib kasutajalt valikvastustega küsimusi ja
+    suunab vastavalt vastusele järgmisse sammu.
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: multi_choice_question_1
+multi_choice_question_1:
+  assign:
+    buttons:
+      - id: "1"
+        title: Jah
+        payload: "#service, /POST/services/active/lihtne_teenus_test_mcq_1_0"
+      - id: "2"
+        title: Ei
+        payload: "#service, /POST/services/active/lihtne_teenus_test_mcq_1_1"
+    res:
+      result: lithsa teenuse küsims
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml b/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
new file mode 100644
index 0000000..0f85846
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
@@ -0,0 +1,82 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Description placeholder for 'customer_feedback'
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: assign_1
+assign_1:
+  assign:
+    customer_rating: ${incoming.body.input[0]}
+  next: condition_1
+condition_1:
+  switch:
+    - condition: ${customer_rating > 3}
+      next: send_message_to_client_1
+  next: send_message_to_client_2
+send_message_to_client_1:
+  assign:
+    res:
+      result: Thank you for your positive feedback with ${customer_rating}
+  next: end_service_1
+send_message_to_client_2:
+  assign:
+    res:
+      result: Thank you for your feedback with rating ${customer_rating} . and we are
+        trying to improve the system
+  next: end_service_2
+end_service_1:
+  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
+  requestType: templates
+  body:
+    message: ""
+  result: end_service_1_result
+  next: format_messages
+end_service_2:
+  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
+  requestType: templates
+  body:
+    message: ""
+  result: end_service_2_result
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/add.yml b/DSL/Ruuter.public/services/POST/services/add.yml
new file mode 100644
index 0000000..a9a81ec
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/add.yml
@@ -0,0 +1,177 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ADD'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    header:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+    body:
+      - field: content
+        type: string
+        description: "Body field 'content'"
+      - field: description
+        type: string
+        description: "Body field 'description'"
+      - field: entities
+        type: object
+        description: "Body field 'Entities'"
+      - field: examples
+        type: object
+        description: "Body field 'Examples'"
+      - field: isCommon
+        type: boolean
+        description: "Body field 'isCommon'"
+      - field: name
+        type: string
+        description: "Body field 'name'"
+      - field: serviceId
+        type: string
+        description: "Body field 'serviceId'"
+      - field: structure
+        type: object
+        description: "Body field 'structure'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+      - field: updateServiceDb
+        type: boolean
+        description: "Body field 'updateServiceDb'"
+      - field: state
+        type: string
+        description: "Body field 'state'"  
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.name == null || incoming.body.description == null || incoming.body.type == null || incoming.body.content == null || incoming.body.serviceId == null || incoming.body.isCommon == null || incoming.body.structure == null}
+      next: return_incorrect_request
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    name: ${incoming.body.name}
+    description: ${incoming.body.description}
+    slot: ${""}
+    entities: ${incoming.body.entities}
+    examples: ${incoming.body.examples}
+    type: ${incoming.body.type.toUpperCase()}
+    content: ${incoming.body.content}
+    serviceId: ${incoming.body.serviceId}
+    isCommon: ${incoming.body.isCommon}
+    structure: ${incoming.body.structure}
+    state: ${incoming.body.state}
+  next: check_if_update_service_db
+
+check_if_update_service_db:
+  switch:
+    - condition: ${incoming.body.updateServiceDb === true}
+      next: check_if_name_exists
+  next: delete_all_mcq_files
+
+check_if_name_exists:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/services/check_name_exist"
+    body:
+      name: ${name}
+  result: name_exists_res
+  next: check_name_exists_result
+
+check_name_exists_result:
+  switch:
+    - condition: ${name_exists_res.response.body[0].nameExists}
+      next: return_name_already_exists
+  next: service_add   
+
+service_add:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/add"
+    body:
+      name: ${name}
+      description: ${description}
+      slot: ${slot}
+      entities: ${entities}
+      examples: ${examples}
+      ruuter_type: ${type}
+      service_id: ${serviceId}
+      is_common: ${isCommon}
+      state: ${state}
+      structure: ${structure}
+  result: createdService
+  next: convert_json_content_to_yml
+
+delete_all_mcq_files:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
+    body:
+      path: "[#RUUTER_SERVICES_PATH]/${type}/services/draft"
+      keyword: "${name}_"
+  result: deleteRes 
+
+convert_json_content_to_yml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
+    body:
+      data: ${content}
+  result: ymlResult
+
+check_for_type:
+  switch:
+    - condition: ${type === 'GET'}
+      next: add_get_dsl
+  next: add_post_dsl
+
+add_get_dsl:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: "[#RUUTER_SERVICES_GET_PATH]/draft/${name}.tmp"
+      content: ${ymlResult.response.body.yaml}
+  result: results
+  next: check_result
+
+add_post_dsl:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: "[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp"
+      content: ${ymlResult.response.body.yaml}
+  result: results
+  next: check_result
+
+check_result:
+  switch:
+    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
+      next: return_ok
+  next: return_bad_request
+
+return_ok:
+  reloadDsl: true
+  status: 200
+  return: ${results.response.body.message}
+  next: end
+
+return_bad_request:
+  status: 400
+  return: ${results.response.body.message}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
+  next: end
+
+return_name_already_exists:
+  status: 409
+  return: "Service name already exists"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/create-endpoint.yml b/DSL/Ruuter.public/services/POST/services/create-endpoint.yml
new file mode 100644
index 0000000..2c73fd4
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/create-endpoint.yml
@@ -0,0 +1,46 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Creates a new endpoint"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: endpointId
+        type: string
+        description: "Endpoint UUID"
+      - field: name
+        type: string
+        description: "Endpoint name"
+      - field: type
+        type: string
+        description: "Endpoint type"
+      - field: isCommon
+        type: boolean
+        description: "Endpoint common status"
+      - field: serviceId
+        type: string
+        description: "Service UUID"
+      - field: definitions
+        type: object
+        description: "Endpoint definitions"
+
+create_endpoint:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/create_endpoint"
+    body:
+      endpointId: ${incoming.body.endpointId}
+      name: ${incoming.body.name}
+      type: ${incoming.body.type}
+      isCommon: ${incoming.body.isCommon}
+      serviceId: ${incoming.body.serviceId ?? ''}
+      definitions: ${incoming.body.definitions}
+  result: res
+
+return_ok:
+  status: 200
+  return: "Endpoint created"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml b/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
new file mode 100644
index 0000000..f585495
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
@@ -0,0 +1,34 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Deletes an endpoint"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: id
+        type: string
+        description: "Endpoint UUID"
+
+delete_endpoint:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/delete_endpoint"
+    body:
+      id: ${incoming.body.id}
+  result: res
+
+remove_from_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/remove_endpoint_from_preferences"
+    body:
+      endpoint_id: ${incoming.body.id}
+  result: preferences_res
+
+return_ok:
+  status: 200
+  return: "Endpoint deleted"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/delete.yml b/DSL/Ruuter.public/services/POST/services/delete.yml
new file mode 100644
index 0000000..c9a7bef
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/delete.yml
@@ -0,0 +1,155 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'DELETE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+    body:
+      - field: id
+        type: string
+        description: "Body field 'id'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body.id == null || incoming.body.type == null}
+      next: return_incorrect_request
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    id: ${incoming.body.id}
+    ruuter_type: ${incoming.body.type.toUpperCase()}
+  next: get_service_name
+
+get_service_name:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-name-by-id"
+    body:
+      id: ${id}
+  result: name_res
+  next: get_current_status
+
+get_current_status:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/status"
+    body:
+      id: ${id}
+  result: status_res
+  next: check_status
+
+check_status:
+  switch:
+    - condition: ${status_res.response.body[0].currentState === 'active'}
+      next: return_cannot_delete_active_service
+    - condition: ${status_res.response.body[0].currentState === 'ready'}
+      next: assign_draft_path
+  next: assign_old_path
+
+assign_old_path:
+  assign:
+    old_file_status_path: ${status_res.response.body[0].currentState}
+  next: delete_service
+
+assign_draft_path:
+  assign:
+    old_file_status_path: "draft"
+  next: delete_service
+
+delete_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/delete-service"
+    body:
+      id: ${id}
+  result: res
+  next: check_service_file_exists
+
+check_service_file_exists:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/exists"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name_res.response.body[0].name}.tmp"
+  result: service_file_exists
+  next: validate_service_file_exists
+
+validate_service_file_exists:
+  switch:
+    - condition: ${!!service_file_exists.response.body}
+      next: delete_deactivated_service
+  next: delete_endpoints_by_service_id
+
+delete_deactivated_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/delete"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name_res.response.body[0].name}.tmp"
+  result: results
+  next: check_result
+
+check_result:
+  switch:
+    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
+      next: delete_endpoints_by_service_id
+  next: return_bad_request
+
+delete_endpoints_by_service_id:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/delete_endpoints_by_service_id"
+    body:
+      serviceId: ${id}
+  result: delete_endpoint_results
+  next: remove_service_endpoints_from_preferences
+
+remove_service_endpoints_from_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/remove_service_endpoints_from_preferences"
+    body:
+      serviceId: ${id}
+  result: remove_preferences_results
+  next: delete_mcq_files
+
+delete_mcq_files:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
+    body:
+      path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
+      keyword: "${name_res.response.body[0].name}_"
+  result: deleted_mcq
+
+return_ok:
+  reloadDsl: true
+  status: 200
+  return: "Service Deleted Successfully"
+  next: end
+
+return_bad_request:
+  status: 400
+  return: ${results.response.body.message}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
+  next: end
+
+return_cannot_delete_active_service:
+  status: 400
+  return: "Cannot delete active service"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml b/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
new file mode 100644
index 0000000..b38fcd4
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
@@ -0,0 +1,157 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'domain-intent-service-link'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: intent
+        type: string
+        description: "Body field 'intent'"
+      - field: serviceName
+        type: string
+        description: "Body field 'serviceName'"
+      - field: serviceMethod
+        type: string
+        description: "Body field 'serviceMethod'"
+      - field: serviceSlot
+        type: string
+        description: "Slot input"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+    header:
+      - field: cookie
+        type: string
+        description: "Cookie field"    
+
+extract_request_data:
+  assign:
+    serviceName: "${incoming.body.serviceName || ''}"
+    serviceMethod: "${incoming.body.serviceMethod || 'POST'}"
+    serviceSlot: ", ({${incoming.body.serviceSlot ? incoming.body.serviceSlot : ''}})"
+    intent: "${incoming.body.intent}"
+    type: "${incoming.body.type}"
+    service_path: "#service, /${serviceMethod}/services/active/${serviceName + (incoming.body.serviceSlot ? serviceSlot : '')}"
+
+get_file_locations:
+  call: http.get
+  args:
+    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: fileLocations
+  next: get_domain_file
+
+get_domain_file:
+  call: http.get
+  args:
+    url: "[#SERVICE_RUUTER]/internal/domain-file"
+    headers:
+      cookie: ${incoming.headers.cookie}
+  result: domainData
+  next: assign_domain_file_data
+
+assign_domain_file_data:
+  assign:
+    domain_data_json: ${domainData.response.body.response}
+  next: check_if_intent_exists
+
+check_if_intent_exists:
+  switch:
+    - condition: ${domain_data_json.intents.includes(intent)}
+      next: update_existing_domain_response
+  next: return_intent_does_not_exist
+
+update_existing_domain_response:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/domain/update-existing-response"
+    body:
+      json: ${domain_data_json.responses}
+      searchKey: ${intent}
+      newKey: ${"utter_" + intent}
+      newKeyValue: '${type === "Add" ? service_path : "UNKNOWN"}'
+      deleteOldValue: false
+      createIfAbsent: true
+  result: updatedResponses
+  next: check_for_type
+
+check_for_type:
+  switch:
+    - condition: ${type === "Add"}
+      next: assignRuleData
+  next: convert_domain_json_to_yaml
+
+assignRuleData:
+  assign: 
+     data:
+       rule: "rule${intent}"
+       steps: [
+        {
+          intent: "${intent}",
+        },
+        {
+          action: "utter_${intent}",
+        },
+       ]
+  next: add_rule     
+
+add_rule:
+  call: http.post
+  args:
+    url: "[#SERVICE_RUUTER]/rasa/rules/add"
+    headers:
+      cookie: ${incoming.headers.cookie}
+    body:
+      data: ${data}
+  result: add_rule_res
+  next: convert_domain_json_to_yaml
+
+convert_domain_json_to_yaml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_domain"
+    body:
+      version: ${domain_data_json.version}
+      session_config: ${domain_data_json.session_config}
+      intents: ${domain_data_json.intents}
+      entities: ${domain_data_json.entities}
+      slots: ${domain_data_json.slots}
+      forms: ${domain_data_json.forms}
+      actions: ${domain_data_json.actions}
+      responses: ${updatedResponses.response.body}
+  result: domainYaml
+  next: resave_domain_file
+
+resave_domain_file:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: ${fileLocations.response.body.response.domain_location}
+      content: ${domainYaml.response.body.json}
+  result: fileResult
+  next: updateOpenSearch
+
+updateOpenSearch:
+  call: http.post
+  args:
+    url: "[#SERVICES_PIPELINE]/bulk/domain"
+    body:
+      input: ${domainYaml.response.body.json}
+  result: updateSearchResult
+  next: return_result
+
+return_result:
+  status: 200
+  return: "Connection request sent successfully"
+  next: end
+
+return_intent_does_not_exist:
+  status: 400
+  return: "Intent does not exists"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/draft/.guard b/DSL/Ruuter.public/services/POST/services/draft/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/draft/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/draft/test.tmp b/DSL/Ruuter.public/services/POST/services/draft/test.tmp
new file mode 100644
index 0000000..2625647
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/draft/test.tmp
@@ -0,0 +1,48 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: Description placeholder for 'test'
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowList:
+    body:
+      - field: chatId
+        type: string
+        description: The chat ID for the message
+      - field: authorId
+        type: string
+        description: The author ID for the message
+      - field: input
+        type: object
+        description: The Input from the user
+prepare:
+  assign:
+    chatId: ${incoming.body.chatId}
+    authorId: ${incoming.body.authorId}
+    input: ${incoming.body.input}
+    buttons: []
+    res:
+      result: ""
+  next: format_messages
+format_messages:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
+    headers:
+      type: json
+    body:
+      data:
+        botMessages: ${[res]}
+        chatId: ${chatId ?? ''}
+        authorId: ${authorId ?? ''}
+        authorFirstName: ""
+        authorLastName: ""
+        authorTimestamp: ${new Date().toISOString()}
+        created: ${new Date().toISOString()}
+        buttons: ${buttons ?? []}
+  result: formatMessage
+  next: service-end
+service-end:
+  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/edit.yml b/DSL/Ruuter.public/services/POST/services/edit.yml
new file mode 100644
index 0000000..5756314
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/edit.yml
@@ -0,0 +1,381 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'EDIT'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: content
+        type: string
+        description: "Body field 'content'"
+      - field: description
+        type: string
+        description: "Body field 'description'"
+      - field: isCommon
+        type: boolean
+        description: "Body field 'isCommon'"  
+      - field: entities
+        type: object
+        description: "Body field 'Entities'"
+      - field: examples
+        type: object
+        description: "Body field 'Examples'"
+      - field: name
+        type: string
+        description: "Body field 'name'"
+      - field: structure
+        type: object
+        description: "Body field 'structure'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+      - field: updateServiceDb
+        type: boolean
+        description: "Body field 'updateServiceDb'"
+      - field: state
+        type: string
+        description: "Body field 'state'"  
+    params:
+      - field: id
+        type: string
+        description: "Parameter 'id'"
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"    
+
+extract_request_data:
+  assign:
+    id: ${incoming.params.id}
+    name: ${incoming.body.name}
+    description: ${incoming.body.description}
+    isCommon: ${incoming.body.isCommon}
+    slot: ${""}
+    entities: ${incoming.body.entities}
+    examples: ${incoming.body.examples}
+    type: ${incoming.body.type}
+    content: ${incoming.body.content}
+    structure: ${incoming.body.structure}
+    updateServiceDb: ${incoming.body.updateServiceDb}
+    state: ${incoming.body.state}
+
+check_for_update_service_db:
+  switch:
+    - condition: ${incoming.body.updateServiceDb === true}
+      next: get_service
+  next: check_for_content
+
+get_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/services/get_services_by_ids"
+    body:
+      serviceIds: "${id}"
+  result: get_service_result
+
+check_if_name_is_the_same:
+  switch:
+    - condition: ${get_service_result.response.body[0].name === name}
+      next: delete_all_mcq_files
+  next: check_if_name_exists
+
+check_if_name_exists:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/services/check_name_exist"
+    body:
+      name: ${name}
+  result: name_exists_res
+  next: check_name_exists_result
+
+check_name_exists_result:
+  switch:
+    - condition: ${name_exists_res.response.body[0].nameExists}
+      next: return_name_already_exists
+  next: delete_all_mcq_files   
+
+delete_all_mcq_files:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
+    body:
+      path: "[#RUUTER_SERVICES_PATH]/${type}/services/draft"
+      keyword: "${name}_"
+  result: deleteRes
+  next: check_for_content
+
+check_for_content:
+  switch:
+    - condition: ${content === null}
+      next: check_for_required_parameters
+  next: convert_json_content_to_yml
+
+convert_json_content_to_yml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
+    body:
+      data: ${content}
+  result: ymlResult
+
+check_for_type:
+  switch:
+    - condition: ${type === 'GET'}
+      next: add_get_dsl
+  next: add_post_dsl
+
+add_get_dsl:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: "[#RUUTER_SERVICES_GET_PATH]/draft/${name}.tmp"
+      content: ${ymlResult.response.body.yaml}
+  result: results
+  next: check_for_required_parameters
+
+add_post_dsl:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: "[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp"
+      content: ${ymlResult.response.body.yaml}
+  result: results
+  next: check_for_required_parameters
+
+check_for_required_parameters:
+  switch:
+    - condition: ${id === null || name === null || description === null}
+      next: return_incorrect_request
+    - condition: ${type === null}
+      next: return_incorrect_request
+
+upper_case_type:
+  assign:
+    type: ${type.toUpperCase()}
+
+check_type:
+  switch:
+    - condition: ${type !== 'GET' && type !== 'POST'}
+      next: return_incorrect_request
+
+check_if_update_service_db:
+  switch:
+    - condition: ${incoming.body.updateServiceDb === true}
+      next: get_service_by_id
+  next: return_ok
+
+get_service_by_id:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-by-id"
+    body:
+      id: ${id}
+  result: old_service_result
+
+assign_values:
+  assign:
+    old_name: ${old_service_result.response.body[0].name}
+    old_structure: ${old_service_result.response.body[0].structure}
+    old_state: ${old_service_result.response.body[0].state}
+    service_type: ${old_service_result.response.body[0].type}
+
+check_new_structure:
+  switch:
+    - condition: ${structure === null}
+      next: use_old_structure
+    - condition: ${structure !== null}
+      next: use_new_structure
+
+use_new_structure:
+  assign:
+    new_structure: ${structure}
+  next: rename_dsl
+
+use_old_structure:
+  assign:
+    new_structure: ${old_structure.value}
+  next: rename_dsl
+
+rename_dsl:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${type}/[#RUUTER_SERVICES_DIR_PATH]/${old_state}/${old_name}.tmp"
+      new_path: "[#RUUTER_SERVICES_PATH]/${type}/[#RUUTER_SERVICES_DIR_PATH]/${old_state}/${name}.tmp"
+  result: results
+
+service_edit:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/edit"
+    body:
+      id: ${id}
+      name: ${name}
+      description: ${description}
+      is_common: ${isCommon ?? false}
+      slot: ${slot}
+      examples: ${examples}
+      entities: ${entities}
+      structure: ${new_structure}
+      state: ${state ?? 'draft'}
+  result: editedService
+
+check for_state:
+  switch:
+    - condition: ${state === 'draft'}
+      next: check_remove_blob_then_draft
+    - condition: ${state === 'active'}
+      next: check_if_service_data_exists
+  next: check_remove_blob_then_ok
+
+check_remove_blob_then_draft:
+  switch:
+    - condition: ${old_state === 'active'}
+      next: delete_from_storage_edit
+  next: change_state_to_draft
+
+check_remove_blob_then_ok:
+  switch:
+    - condition: ${old_state === 'active'}
+      next: delete_from_storage_edit
+  next: return_ok
+
+delete_from_storage_edit:
+  call: http.delete
+  args:
+    url: "[#STORAGE_FERRY]/v1/files/delete"
+    body:
+      files:
+        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
+          container: "[#STORAGE_FERRY_CONTAINER]"
+          fileName: "${old_name}.json"
+  result: ferry_delete_result
+  next: check_if_azure_configured_after_delete_edit
+
+check_if_azure_configured_after_delete_edit:
+  switch:
+    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
+      next: trigger_azure_indexer_after_delete_edit
+  next: log_azure_not_configured_after_delete_edit
+
+log_azure_not_configured_after_delete_edit:
+  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
+  next: after_delete_from_storage_edit
+
+trigger_azure_indexer_after_delete_edit:
+  call: http.post
+  args:
+    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
+    headers:
+      api-key: "[#AZURE_SEARCH_API_KEY]"
+      Content-Type: "application/json"
+  result: azure_indexer_result
+  next: after_delete_from_storage_edit
+
+after_delete_from_storage_edit:
+  switch:
+    - condition: ${state === 'draft'}
+      next: change_state_to_draft
+  next: return_ok
+
+change_state_to_draft:
+  call: http.post
+  args:
+    url: "[#SERVICE_RUUTER]/services/status"
+    headers:
+      cookie: ${incoming.headers.cookie}
+    body:
+      id: ${id}
+      state: "draft"
+      type: ${service_type ?? 'POST'}
+  result: changeStateResult
+  next: return_ok
+
+check_if_service_data_exists:
+  switch:
+    - condition: ${old_service_result !== undefined && old_service_result !== null}
+      next: generate_service_json_from_existing
+  next: get_service_data_for_json
+
+get_service_data_for_json:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-by-id"
+    body:
+      id: ${id}
+  result: service_data_result
+  next: generate_service_json
+
+generate_service_json_from_existing:
+  assign:
+    service_data_result: ${old_service_result}
+  next: generate_service_json
+
+generate_service_json:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/service_declaration"
+    headers:
+      type: 'json'
+    body:
+      name: ${name}
+      description: ${service_data_result.response.body[0].description}
+      examples: ${service_data_result.response.body[0].examples}
+      entities: ${service_data_result.response.body[0].entities}
+  result: service_json_result
+  next: replace_in_storage
+
+replace_in_storage:
+  call: http.post
+  args:
+    url: "[#STORAGE_FERRY]/v1/files/create"
+    body:
+      files:
+        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
+          container: "[#STORAGE_FERRY_CONTAINER]"
+          fileName: "${name}.json"
+      content: ${JSON.stringify(service_json_result.response.body)}
+  result: ferry_upload_result
+  next: check_if_azure_configured
+
+check_if_azure_configured:
+  switch:
+    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
+      next: trigger_azure_indexer
+  next: log_azure_not_configured
+
+log_azure_not_configured:
+  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
+  next: return_ok
+
+trigger_azure_indexer:
+  call: http.post
+  args:
+    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
+    headers:
+      api-key: "[#AZURE_SEARCH_API_KEY]"
+      Content-Type: "application/json"
+  result: azure_indexer_result
+  next: return_ok
+
+return_ok:
+  reloadDsl: true
+  status: 200
+  return: "Edited Successfully"
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
+  next: end
+
+return_name_already_exists:
+  status: 409
+  return: "Service name already exists"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml b/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
new file mode 100644
index 0000000..a628079
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
@@ -0,0 +1,32 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ENDPOINT-URL-VALIDATION'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: type
+        type: string
+        description: "Body field 'type'"
+      - field: url
+        type: string
+        description: "Body field 'url'"
+
+extract_request_data:
+  assign:
+    url: ${incoming.body.url}
+    type: ${incoming.body.type}
+
+call_template:
+  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
+  requestType: templates
+  body:
+    response: ${url}
+    type: ${type}
+  result: templateResult
+
+return_result:
+  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/POST/services/import-services.yml b/DSL/Ruuter.public/services/POST/services/import-services.yml
new file mode 100644
index 0000000..89a5164
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/import-services.yml
@@ -0,0 +1,71 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'IMPORT-SERVICES'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: services
+        type: object
+        description: "Body field 'services'"
+      - field: timezone
+        type: string
+        description: "Body field 'timezone'"
+
+extract_request_data:
+  assign:
+    services: ${incoming.body.services ?? []}
+    names: ${services.map(s => s.fileName).join(",") ?? []}
+    timezone: ${incoming.body.timezone}
+
+get_import_names:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-import-names"
+    body:
+      names: ${names}
+      timezone: ${timezone}
+  result: import_names_res
+
+assign_imported_names:
+  assign:
+    imported_names: ${import_names_res.response.body[0].names.split(",")}
+    services: "$=services.map((s, i) => ({ ...s, fileName: imported_names[i] }))="
+    file_names: ${services.map(s => s.fileName)}
+
+insert_services:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/add-services"
+    body:
+      names: ${file_names}
+      structures: ${services.map(s => s.flowData)}
+  result: insert_services_res
+
+convert_json_content_to_yml:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data_multiple"
+    body:
+      data: ${services.map(s => s.content)}
+  result: ymls_res
+
+prepare_files:
+  assign:
+    file_paths: "$=file_names.map(name => `[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp`)="
+    yaml_contents: ${ymls_res.response.body.yamls}
+
+add_dsls:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create_multiple"
+    body:
+      file_paths: ${file_paths}
+      contents: ${yaml_contents}
+  result: add_dsls_res
+
+return_result:
+  return: "Services imported successfully"
diff --git a/DSL/Ruuter.public/services/POST/services/inactive/.guard b/DSL/Ruuter.public/services/POST/services/inactive/.guard
new file mode 100644
index 0000000..6443537
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/inactive/.guard
@@ -0,0 +1,4 @@
+guard_allow_all:
+  return: "success"
+  status: 200
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/open-api-spec.yml b/DSL/Ruuter.public/services/POST/services/open-api-spec.yml
new file mode 100644
index 0000000..3dcc2c8
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/open-api-spec.yml
@@ -0,0 +1,34 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'OPEN-API-SPEC'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: url
+        type: string
+        description: "Body field 'url'"
+
+check_for_required_parameters:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.url == null}
+      next: return_incorrect_request
+  next: get_spec
+
+get_spec:
+  call: http.get
+  args:
+    url: ${incoming.body.url}
+  result: result
+
+return_value:
+  return: ${result.response.body}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "missing parameters"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/requests/explain.yml b/DSL/Ruuter.public/services/POST/services/requests/explain.yml
new file mode 100644
index 0000000..e5fce84
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/requests/explain.yml
@@ -0,0 +1,95 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'EXPLAIN'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: requests
+        type: object
+        description: "request object containing multiple requests"
+
+check_for_requests:
+  switch:
+    - condition: ${incoming.body.requests == null}
+      next: missing_requests
+  next: initialize_responses
+
+initialize_responses:
+  assign:
+    requests: ${incoming.body.requests}
+    responses: []
+    index: 0
+  next: process_next_request
+
+process_next_request:
+  switch:
+    - condition: ${index < requests.length}
+      next: assign_current_request
+  next: return_responses
+
+assign_current_request:
+  assign:
+    current_request: ${requests[index]}
+  next: check_method  
+
+check_method:
+  switch:
+    - condition: ${current_request.method.toLowerCase() == 'post'}
+      next: request_explain_post
+  next: request_explain_get
+
+request_explain_get:
+  call: http.get
+  args:
+    url: ${current_request.url}
+    headers:
+      Content-Type: "application/json"
+  result: res
+  next: assign_result
+
+request_explain_post:
+  call: http.post
+  args:
+    url: ${current_request.url}
+    headers:
+      Content-Type: "application/json"
+    body:
+      data: ${current_request.body}
+  result: res
+  next: assign_result
+
+assign_result:
+  assign:
+    result_res: ${[res.response.body]}
+  next: check_responses_list
+
+check_responses_list:
+  switch:
+    - condition: ${responses.length === 0}
+      next: append_first_response
+  next: append_response
+
+append_first_response:
+  assign:
+    responses: ${[responses, ...result_res]}
+    index: ${index + 1}
+  next: process_next_request
+
+append_response:
+  assign:
+    responses: ${[...responses, ...result_res]}
+    index: ${index + 1}
+  next: process_next_request
+
+return_responses:
+  return: ${responses.splice(1 , responses.length - 1)}
+  next: end
+
+missing_requests:
+  status: 400
+  return: "required requests were not provided"
+  next: end
\ No newline at end of file
diff --git a/DSL/Ruuter.public/services/POST/services/resql/add.yml b/DSL/Ruuter.public/services/POST/services/resql/add.yml
new file mode 100644
index 0000000..b936d8f
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/resql/add.yml
@@ -0,0 +1,62 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'ADD'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: name
+        type: string
+        description: "Body field 'name'"
+      - field: sql
+        type: string
+        description: "Body field 'sql'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: missing_parameter
+
+extract_request_data:
+  assign:
+    name: ${incoming.body.name}
+    sql: ${incoming.body.sql}
+
+check_for_required_parameters:
+  switch:
+    - condition: ${name == null || sql == null}
+      next: missing_parameter
+  next: add_resql
+
+missing_parameter:
+  status: 400
+  return: "required parameters were not provided"
+  next: end
+
+add_resql:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/create"
+    body:
+      file_path: "/Resql/services/${name}.sql"
+      content: ${sql}
+  result: results
+
+check_result:
+  switch:
+    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
+      next: return_ok
+  next: return_bad_request
+
+return_ok:
+  status: 200
+  return: ${results.response.body.message}
+  next: end
+
+return_bad_request:
+  status: 400
+  return: ${results.response.body.message}
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/status.yml b/DSL/Ruuter.public/services/POST/services/status.yml
new file mode 100644
index 0000000..788aad7
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/status.yml
@@ -0,0 +1,303 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'STATUS'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: id
+        type: string
+        description: "Body field 'id'"
+      - field: state
+        type: string
+        description: "Body field 'state'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"      
+
+extract_request_data:
+  assign:
+    id: ${incoming.body.id}
+    new_state: ${incoming.body.state}
+    ruuter_type: ${incoming.body.type}
+
+check_for_required_parameters:
+  switch:
+    - condition: ${id === null || new_state === null || ruuter_type === null}
+      next: return_incorrect_request
+    - condition: ${new_state === "ready"}
+      next: set_plain_status
+  next: get_current_status
+
+get_current_status:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/status"
+    body:
+      id: ${id}
+  result: status_res
+  next: assign_old_status_and_path
+
+assign_old_status_and_path:
+  assign:
+    old_file_status_path: "${status_res.response.body[0].currentState === 'ready' ? 'draft' : status_res.response.body[0].currentState}"
+    old_file_end: "${status_res.response.body[0].currentState !== 'active' ? '.tmp' : '.yml'}"
+  next: check_status
+
+check_status:
+  switch:
+    - condition: ${new_state === "draft"}
+      next: set_status
+    - condition: ${status_res.response.body[0].currentState === new_state}
+      next: return_same_state_update
+  next: set_status
+
+set_status:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/set-status"
+    body:
+      id: ${id}
+      new_state: ${new_state}
+  result: res
+  next: get_status_name
+
+set_plain_status:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/set-status"
+    body:
+      id: ${id}
+      new_state: ${new_state}
+  result: draft_res
+  next: return_ok
+
+get_status_name:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-name-by-id"
+    body:
+      id: ${id}
+  result: name_res
+  next: assign_values
+
+assign_values:
+  assign:
+    name: ${name_res.response.body[0].name}
+    service_name: "service_${name_res.response.body[0].name}"
+  next: check_file_exists
+
+check_file_exists:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/exists"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
+  result: service_file_exists_result
+  next: validate_file_exists
+
+validate_file_exists:
+  switch:
+    - condition: ${!!service_file_exists_result.response.body}
+      next: check_for_status
+  next: return_service_file_missing
+
+check_for_status:
+  switch:
+    - condition: ${new_state === "active"}
+      next: activate_service
+    - condition: ${new_state === "draft"}
+      next: draft_service
+  next: deactivate_service
+
+activate_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/active/${name}.yml"
+  result: activate_service_result
+  next: get_service_data_for_json
+
+get_service_data_for_json:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-service-by-id"
+    body:
+      id: ${id}
+  result: service_data_result
+  next: generate_service_json
+
+generate_service_json:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER_HBS]/service_declaration"
+    headers:
+      type: 'json'
+    body:
+      name: ${name}
+      description: ${service_data_result.response.body[0].description}
+      examples: ${service_data_result.response.body[0].examples}
+      entities: ${service_data_result.response.body[0].entities}
+  result: service_json_result
+  next: create_in_storage
+
+create_in_storage:
+  call: http.post
+  args:
+    url: "[#STORAGE_FERRY]/v1/files/create"
+    body:
+      files:
+        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
+          container: "[#STORAGE_FERRY_CONTAINER]"
+          fileName: "${name}.json"
+      content: ${JSON.stringify(service_json_result.response.body)}
+  result: ferry_upload_result
+  next: check_if_azure_configured_after_create
+
+check_if_azure_configured_after_create:
+  switch:
+    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
+      next: trigger_azure_indexer_after_create
+  next: log_azure_not_configured_after_create
+
+log_azure_not_configured_after_create:
+  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
+  next: activate_all_mcq_services
+
+trigger_azure_indexer_after_create:
+  call: http.post
+  args:
+    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
+    headers:
+      api-key: "[#AZURE_SEARCH_API_KEY]"
+      Content-Type: "application/json"
+  result: azure_indexer_result
+  next: activate_all_mcq_services
+ 
+activate_all_mcq_services:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/active"
+      keyword: "${name}_"
+      format: "yml"
+  result: active_move_results
+  next: return_ok
+
+deactivate_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/inactive/${name}.tmp"
+  result: deactivate_service_result
+  next: delete_from_storage
+
+delete_from_storage:
+  call: http.delete
+  args:
+    url: "[#STORAGE_FERRY]/v1/files/delete"
+    body:
+      files:
+        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
+          container: "[#STORAGE_FERRY_CONTAINER]"
+          fileName: "${name}.json"
+  result: ferry_delete_result
+  next: check_if_azure_configured_after_delete
+
+check_if_azure_configured_after_delete:
+  switch:
+    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
+      next: trigger_azure_indexer_after_delete
+  next: log_azure_not_configured_after_delete
+
+log_azure_not_configured_after_delete:
+  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
+  next: dactivate_all_mcq_services
+
+trigger_azure_indexer_after_delete:
+  call: http.post
+  args:
+    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
+    headers:
+      api-key: "[#AZURE_SEARCH_API_KEY]"
+      Content-Type: "application/json"
+  result: azure_indexer_result
+  next: dactivate_all_mcq_services
+
+dactivate_all_mcq_services:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/inactive"
+      keyword: "${name}_"
+      format: "tmp"
+  result: inactive_move_results
+  next: return_ok
+
+draft_service:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/draft/${name}.tmp"
+  result: draft_service_result
+  next: draft_all_mcq_services
+
+draft_all_mcq_services:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
+    body:
+      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
+      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/draft"
+      keyword: "${name}_"
+      format: "tmp"
+  result: inactive_move_results
+  next: return_ok
+
+return_ok:
+  reloadDsl: true
+  status: 200
+  return: "Status Changed Successfully"
+  next: end
+
+return_bad_request:
+  status: 400
+  return: ${err_result.response.body.message}
+  next: end
+
+return_incorrect_request:
+  status: 400
+  return: "Required parameter(s) missing"
+  next: end
+
+return_same_state_update:
+  status: 200
+  return: "Service is already in this state"
+  next: end
+
+return_service_file_missing:
+  status: 500
+  return: "Service file to update is missing"
+  next: end
+
+return_intent_does_not_exist:
+  status: 400
+  return: "does not exists"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/update-endpoint.yml b/DSL/Ruuter.public/services/POST/services/update-endpoint.yml
new file mode 100644
index 0000000..9228079
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/services/update-endpoint.yml
@@ -0,0 +1,61 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Updates an existing endpoint"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: name
+        type: string
+        description: "Endpoint name"
+      - field: type
+        type: string
+        description: "Endpoint type"
+      - field: isCommon
+        type: boolean
+        description: "Endpoint common status"
+      - field: serviceId
+        type: string
+        description: "Service UUID"
+      - field: definitions
+        type: object
+        description: "Endpoint definitions"
+    params:
+      - field: id
+        type: string
+        description: "Endpoint UUID"
+
+extract_request_data:
+  assign:
+    id: ${incoming.params.id}
+
+check_for_type:
+  switch:
+    - condition: ${id == null}
+      next: return_no_type_error
+
+update_endpoint:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/endpoints/update_endpoint"
+    body:
+      endpointId: ${id}
+      name: ${incoming.body.name}
+      type: ${incoming.body.type}
+      isCommon: ${incoming.body.isCommon}
+      serviceId: ${incoming.body.serviceId ?? ''}
+      definitions: ${incoming.body.definitions}
+  result: res
+
+return_ok:
+  status: 200
+  return: "Endpoint updated"
+  next: end
+
+return_no_type_error:
+  status: 400
+  return: "Please provide an endpoint ID"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/steps/preferences.yml b/DSL/Ruuter.public/services/POST/steps/preferences.yml
new file mode 100644
index 0000000..61f7272
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/steps/preferences.yml
@@ -0,0 +1,68 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'PREFERENCES'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: steps
+        type: string
+        description: "Body field 'steps'"
+      - field: endpoints
+        type: string
+        description: "Body field 'endpoints'"
+
+extractRequestData:
+  assign:
+    steps: ${incoming.body.steps.join(",")}
+    endpoints: ${incoming.body.endpoints.join(",")}
+
+get_user_info:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
+    contentType: plaintext
+    headers:
+      cookie: ${incoming.headers.cookie}
+    plaintext: "customJwtCookie"
+  result: res
+
+check_user_info_response:
+  switch:
+    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
+      next: assignIdCode
+  next: return_unauthorized
+
+assignIdCode:
+  assign:
+    idCode: ${res.response.body.idCode}
+
+update_user_step_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/update-user-step-preferences"
+    body:
+      steps: "{${steps}}"
+      endpoints: "{${endpoints}}"
+      user_id_code: ${idCode}
+  result: update_preferences_res
+
+get_user_step_preferences:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL]/get-user-step-preferences"
+    body:
+      user_id_code: ${idCode}
+  result: preferences
+
+return_preferences:
+  return: ${preferences.response.body[0]}
+  next: end
+
+return_unauthorized:
+  status: 401
+  return: "unauthorized"
+  next: end
diff --git a/DSL/Ruuter.public/services/POST/user-info.yml b/DSL/Ruuter.public/services/POST/user-info.yml
new file mode 100644
index 0000000..e6309aa
--- /dev/null
+++ b/DSL/Ruuter.public/services/POST/user-info.yml
@@ -0,0 +1,16 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'USER-INFO'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+
+get_tara_info:
+  template: "[#SERVICE_PROJECT_LAYER]/tara"
+  requestType: templates
+  result: TARA
+
+return_authorized:
+  return: ${TARA.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml b/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
new file mode 100644
index 0000000..fbe0f03
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
@@ -0,0 +1,51 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'RBAC'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: allowedRoles
+        type: object
+        description: "Body field 'allowedRoles'"
+      - field: userId
+        type: string
+        description: "Body field 'userId'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null}
+      next: missing_parameter
+
+extract_request_data:
+  assign:
+    userId: ${incoming.body.userId}
+    allowedRoles: ${incoming.body.allowedRoles.sort()}
+
+check_for_required_parameters:
+  switch:
+    - condition: ${userId === null || allowedRoles === null}
+      next: missing_parameter
+  next: fetch_user_roles_from_db
+
+fetch_user_roles_from_db:
+  call: http.post
+  args:
+    url: "[#SERVICE_RESQL_USERS]:[#SERVICE_RESQL_USERS_PORT]/is-user-roles-allowed"
+    body:
+      userId: ${userId}
+      allowedRoles: ${allowedRoles}
+  result: result
+
+return_value:
+  status: 200
+  return: "${result.response.body[0]}"
+  next: end
+
+missing_parameter:
+  status: 400
+  return: "userId, allowedRoles - missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml b/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
new file mode 100644
index 0000000..2564113
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
@@ -0,0 +1,50 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CHECK-USER-AUTHORITY'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+get_cookie_info:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
+    contentType: plaintext
+    headers:
+      cookie: ${incoming.headers.cookie}
+    plaintext: "customJwtCookie"
+  result: res
+  next: check_cookie_info_response
+
+check_cookie_info_response:
+  switch:
+    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
+      next: check_user_authority
+  next: return_bad_request
+
+check_user_authority:
+  switch:
+    - condition: ${res.response.body.authorities.includes("ROLE_ADMINISTRATOR") || res.response.body.authorities.includes("ROLE_SERVICE_MANAGER")}
+      next: return_authorized
+  next: return_unauthorized
+
+return_authorized:
+  return: ${res.response.body}
+  next: end
+
+return_unauthorized:
+  status: 200
+  return: false
+  next: end
+
+return_bad_request:
+  status: 400
+  return: false
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/client-input.yml b/DSL/Ruuter.public/services/TEMPLATES/client-input.yml
new file mode 100644
index 0000000..1d635ea
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/client-input.yml
@@ -0,0 +1,19 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'CLIENT-INPUT'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+
+# TODO: replace with correct request to get user input
+request_client_input:
+  call: reflect.mock
+  args:
+    response:
+      input: "Yes"
+  result: clientInput
+
+return_value:
+  return: ${clientInput.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml b/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
new file mode 100644
index 0000000..175f028
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
@@ -0,0 +1,42 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'DIRECT-TO-CS'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: message
+        type: string
+        description: "Body field 'message'"
+
+# Direct to customer support
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
+      next: missing_body_parameter
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    message: ${incoming.body.message}
+
+# TODO: do the actual request
+send_message_to_client:
+  call: reflect.mock
+  args:
+    response:
+      status: 'OK'
+      message: "Teid suunatakse klienditeenindusse"
+  result: result
+
+return_value:
+  return: ${result.response.body}
+  next: end
+
+missing_body_parameter:
+  status: 400
+  return: 'message - missing'
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml b/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
new file mode 100644
index 0000000..43dbf6b
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
@@ -0,0 +1,42 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'END-CONVERSATION'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: message
+        type: string
+        description: "Body field 'message'"
+
+# End conversation
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
+      next: missing_body_parameter
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    message: ${incoming.body.message}
+
+# TODO: do the actual request
+send_message_to_client:
+  call: reflect.mock
+  args:
+    response:
+      status: 'OK'
+      message: "Teenus on lõpetatud"
+  result: result
+
+return_value:
+  return: ${result.response.body}
+  next: end
+
+missing_body_parameter:
+  status: 400
+  return: 'message - missing'
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml b/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
new file mode 100644
index 0000000..4eb9f42
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
@@ -0,0 +1,45 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'FILE-GENERATE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: fileContent
+        type: string
+        description: "Body field 'fileContent'"
+      - field: fileName
+        type: string
+        description: "Body field 'fileName'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.fileName == null || incoming.body.fileContent == null}
+      next: missing_body_parameters
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    fileName: ${incoming.body.fileName}
+    fileContent: ${incoming.body.fileContent}
+
+generate_pdf_file:
+  call: http.post
+  args:
+    url: "[#SERVICE_DMAPPER]/js/generate/pdf"
+    body:
+      filename: ${fileName}
+      template: ${fileContent}
+  result: result
+
+return:
+  return: ${result.response.body}
+  next: end
+
+missing_body_parameters:
+  status: 400
+  return: "fileName, fileContent - missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml b/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
new file mode 100644
index 0000000..4d2571c
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
@@ -0,0 +1,35 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'FILE-SIGNING'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: country
+        type: string
+        description: "Body field 'country'"
+      - field: personIdentifier
+        type: string
+        description: "Body field 'personIdentifier'"
+      - field: phoneNr
+        type: string
+        description: "Body field 'phoneNr'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+
+siga_template_request:
+  template: siga
+  requestType: templates
+  body:
+    type: ${incoming.body.type}
+    personIdentifier: ${incoming.body.personIdentifier}
+    country: ${incoming.body.country}
+    phoneNr: ${incoming.body.phoneNr}
+  result: result
+
+return_result:
+  return: ${result.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml b/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
new file mode 100644
index 0000000..9435e42
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
@@ -0,0 +1,44 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'OPEN-WEBPAGE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: link
+        type: string
+        description: "Body field 'link'"
+      - field: linkText
+        type: string
+        description: "Body field 'linkText'"
+
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.link == null || incoming.body.link == "" || incoming.body.linkText == null || incoming.body.linkText == ""}
+      next: missing_body_parameter
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    link: ${incoming.body.link}
+    linkText: ${incoming.body.linkText}
+
+send_link_to_client:
+  call: reflect.mock
+  args:
+    response:
+      status: "OK"
+      link: <a href="https://example.com" target="_blank">Link Text</a>
+  result: result
+
+return_value:
+  return: ${result.response.body}
+  next: end
+
+missing_body_parameter:
+  status: 400
+  return: "link, linkText - both or one of these fields are missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml b/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
new file mode 100644
index 0000000..0db2430
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
@@ -0,0 +1,42 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SEND-MESSAGE-TO-CLIENT'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: message
+        type: string
+        description: "Body field 'message'"
+
+# Message to client element
+check_for_body:
+  switch:
+    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
+      next: missing_body_parameter
+  next: extract_request_data
+
+extract_request_data:
+  assign:
+    message: ${incoming.body.message}
+
+# TODO: do the actual request
+send_message_to_client:
+  call: reflect.mock
+  args:
+    response:
+      status: 'OK'
+      message: "Hello, Muki"
+  result: result
+
+return_value:
+  return: ${result.response.body}
+  next: end
+
+missing_body_parameter:
+  status: 400
+  return: 'message - missing'
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/siga.yml b/DSL/Ruuter.public/services/TEMPLATES/siga.yml
new file mode 100644
index 0000000..adb8984
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/siga.yml
@@ -0,0 +1,132 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'SIGA'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: containerType
+        type: string
+        description: "Body field 'containerType'"
+      - field: country
+        type: string
+        description: "Body field 'country'"
+      - field: file
+        type: string
+        description: "Body field 'file'"
+      - field: phoneNumber
+        type: string
+        description: "Body field 'phoneNumber'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+
+extract_request_data:
+  assign:
+    file: ${incoming.body.file}
+    signType: ${incoming.body.type}
+    country: ${incoming.body.country}
+    phoneNumber: ${incoming.body.phoneNumber}
+    containerType: ${incoming.body.containerType}
+  next: get_tara_info
+
+get_tara_info:
+  template: tara
+  requestType: templates
+  result: tara_res
+  next: extract_tara_data
+
+extract_tara_data:
+  assign:
+    identifier: ${tara_res.response.body.idCode}
+  next: check_for_container_type
+
+check_for_container_type:
+  switch:
+    - condition: ${containerType === "ASIC".toLowerCase()}
+      next: create_asic_container
+    - condition: ${containerType === "HASHCODE".toLowerCase()}
+      next: create_hashcode_container
+  next: missing_container_type
+
+create_asic_container:
+  call: http.post
+  args:
+    url: "[#SERVICE_SIGA]/create-container"
+    contentType: formdata
+    body:
+      file:file[0]:uploadedFile.pdf: ${file}
+  return: container_res
+  next: check_if_sign_type_missing
+
+create_hashcode_container:
+  call: http.post
+  args:
+    url: "[#SERVICE_SIGA]/create-hashcode-container"
+    contentType: formdata
+    body:
+      file:file[0]:uploadedFile.pdf: ${file}
+  return: container_res
+  next: check_if_sign_type_missing
+
+check_if_sign_type_missing:
+  switch:
+    - condition: ${signType === null}
+      next: missing_sign_type
+  next: check_for_sign_type
+
+check_for_sign_type:
+  switch:
+    - condition: ${signType === "smart_id"}
+      next: sign_via_smart_id
+    - condition: ${signType === "mobile_sign"}
+      next: sign_via_mobile
+  next: missing_sign_type
+
+sign_via_smart_id:
+  call: http.post
+  args:
+    url: "[#SERVICE_SIGA]/smartid-signing"
+    body:
+      containerId: ${container_res.response.body.id}
+      containerType: ${containerType.toUpperCase()}
+      personIdentifier: ${identifier}
+      country: ${country}
+  return: res
+  next: end
+
+sign_via_mobile:
+  call: http.post
+  args:
+    url: "[#SERVICE_SIGA]/mobile-signing"
+    body:
+      containerId: ${container_res.response.body.id}
+      containerType: ${containerType.toUpperCase()}
+      phoneNr: ${phoneNumber}
+      personIdentifier: ${identifier}
+      country: ${country}
+  return: res
+  next: end
+
+missing_smart_id_params:
+  status: 400
+  return: "Id, country - missing"
+  next: end
+
+missing_mobile_sign_params:
+  status: 400
+  return: "Phone number, country - missing"
+  next: end
+
+missing_sign_type:
+  status: 400
+  return: "Sign type is missing"
+  next: end
+
+missing_container_type:
+  status: 400
+  return: "Container type is missing"
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/tara.yml b/DSL/Ruuter.public/services/TEMPLATES/tara.yml
new file mode 100644
index 0000000..28ac86d
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/tara.yml
@@ -0,0 +1,51 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'TARA'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    headers:
+      - field: cookie
+        type: string
+        description: "Cookie field"
+
+
+check_for_body:
+  switch:
+    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
+      next: missing_cookie
+  next: get_cookie_info
+
+get_cookie_info:
+  call: http.post
+  args:
+    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
+    contentType: plaintext
+    headers:
+      cookie: ${incoming.headers.cookie}
+    plaintext: "customJwtCookie"
+  result: res
+  next: check_cookie_info_response
+
+check_cookie_info_response:
+  switch:
+    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
+      next: return_auth_result
+  next: return_bad_request
+
+return_auth_result:
+  return: ${res.response.body}
+  next: end
+
+return_bad_request:
+  status: 400
+  return: false
+  next: end
+
+missing_cookie:
+  status: 401
+  return: "no authentication cookie"
+  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml b/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml
new file mode 100644
index 0000000..63f21c8
--- /dev/null
+++ b/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml
@@ -0,0 +1,56 @@
+declaration:
+  call: declare
+  version: 0.1
+  description: "Decription placeholder for 'VALIDATION-TEMPLATE'"
+  method: post
+  accepts: json
+  returns: json
+  namespace: service
+  allowlist:
+    body:
+      - field: response
+        type: string
+        description: "Body field 'response'"
+      - field: type
+        type: string
+        description: "Body field 'type'"
+
+assign_step:
+  assign:
+    res: ${incoming.body.response}
+    type: ${incoming.body.type}
+  next: check_for_type
+
+check_for_type:
+  switch:
+    - condition: ${type.toLowerCase() === 'get'}
+      next: validate_get_request
+  next: validate_post_request
+
+validate_get_request:
+  call: http.get
+  args:
+    url: ${res}
+  result: results
+  next: validate_status_code
+
+validate_post_request:
+  call: http.post
+  args:
+    url: ${res}
+  result: results
+  next: validate_status_code
+
+validate_status_code:
+  switch:
+    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
+      next: return_true
+  next: return_false
+
+return_true:
+  return: true
+  next: end
+
+return_false:
+  return: false
+  next: end
diff --git a/constants.ini b/constants.ini
index 63172d1..af50730 100644
--- a/constants.ini
+++ b/constants.ini
@@ -9,4 +9,7 @@ RAG_SEARCH_CRON_MANAGER=http://cron-manager:9010
 RAG_SEARCH_LLM_ORCHESTRATOR=http://llm-orchestration-service:8100/orchestrate
 RAG_SEARCH_PROMPT_REFRESH=http://llm-orchestration-service:8100/prompt-config/refresh
 DOMAIN=localhost
-DB_PASSWORD=dbadmin
\ No newline at end of file
+DB_PASSWORD=dbadmin
+RAG_SEARCH_RUUTER_PUBLIC_INTERNAL_SERVICE=http://ruuter-public:8086/services
+SERVICE_DMAPPER_HBS=http://data-mapper:3000/hbs/rag-search
+SERVICE_PROJECT_LAYER=services
\ No newline at end of file
diff --git a/docs/HYBRID_SEARCH_CLASSIFICATION.md b/docs/HYBRID_SEARCH_CLASSIFICATION.md
index 18c512a..1de3f7f 100644
--- a/docs/HYBRID_SEARCH_CLASSIFICATION.md
+++ b/docs/HYBRID_SEARCH_CLASSIFICATION.md
@@ -53,7 +53,8 @@ The system has two phases:
 | `src/intent_data_enrichment/main_enrichment.py` | Orchestrates per-example and summary point creation |
 | `src/intent_data_enrichment/qdrant_manager.py` | Qdrant collection management, upsert, and deletion |
 | `src/intent_data_enrichment/api_client.py` | LLM API calls (context generation, embeddings) |
-| `src/intent_data_enrichment/models.py` | `EnrichedService` data model |
+| `src/intent_data_enrichment/models.py` | `ServiceData`, `EnrichedService`, `EnrichmentResult` data models |
+| `src/intent_data_enrichment/constants.py` | `EnrichmentConstants` — API URLs, Qdrant config, vector sizes, LLM prompt template |
 | `src/tool_classifier/sparse_encoder.py` | BM25-style sparse vector computation |
 
 ### What Changed: Single Embedding → Per-Example Indexing
@@ -78,8 +79,8 @@ Service "Valuutakursid" → 4 Qdrant points
     dense:  3072-dim embedding of this exact text
     sparse: BM25 vector → {euro: 1.0, gbp: 1.0, kurss: 1.0, ...}
 
-  Point 3 (summary): "Valuutakursid - Kasutaja soovib infot..."
-    dense:  3072-dim embedding of name + description + LLM context
+  Point 3 (summary): "Service Name: Valuutakursid\nDescription: ...\nExample Queries: ...\nRequired Entities: ...\nEnriched Context: ..."
+    dense:  3072-dim embedding of combined text
     sparse: BM25 vector of combined text
 ```
 
@@ -101,9 +102,12 @@ Service "Valuutakursid" → 4 Qdrant points
 
 ```python
 # sparse_encoder.py
+SPARSE_VOCAB_SIZE = 50_000
+
 text = "Mis suhe on euro ja usd vahel"
 tokens = re.findall(r"\w+", text.lower())  # ["mis", "suhe", "on", "euro", ...]
-# Each token → hashed to index in [0, VOCAB_SIZE), value = term frequency
+# Each token → MD5 hash (first 4 bytes) to index in [0, SPARSE_VOCAB_SIZE), value = term frequency
+# Collisions are handled by summing values at the same index
 # Output: SparseVector(indices=[hash("mis"), hash("euro"), ...], values=[1.0, 1.0, ...])
 ```
 
@@ -146,7 +150,7 @@ service_enrichment.sh
   │    ├─ Generate dense embedding (text-embedding-3-large)
   │    └─ Generate sparse vector (BM25 term hashing)
   │
-  ├─ Step 3: Summary point (name + description + LLM context):
+  ├─ Step 3: Summary point (name + description + examples + entities + LLM context):
   │    ├─ Generate dense embedding
   │    └─ Generate sparse vector
   │
@@ -155,6 +159,17 @@ service_enrichment.sh
   └─ Step 5: Bulk upsert N+1 points to Qdrant
 ```
 
+### Summary Point Combined Text Format
+
+The summary point embeds a structured concatenation:
+```
+Service Name: {name}
+Description: {description}
+Example Queries: {example1} | {example2} | ...
+Required Entities: {entity1}, {entity2}, ...
+Enriched Context: {LLM-generated context}
+```
+
 ### Service Deletion
 
 When a service is deactivated, all its points are removed:
@@ -186,12 +201,12 @@ POST /collections/intent_collections/points/query
 {
     "query": [0.023, -0.041, ...],  # 3072-dim dense vector
     "using": "dense",
-    "limit": 6,
+    "limit": 6,                     # DENSE_SEARCH_TOP_K * 2 (3 * 2 = 6, allows dedup)
     "with_payload": true
 }
 ```
 
-Results are deduplicated by `service_id` (best score per service).
+Results are deduplicated by `service_id` (best score per service), returning up to `DENSE_SEARCH_TOP_K` (3) unique services.
 
 **Why not use RRF scores?**  
 Qdrant's RRF uses `1/(1+rank)`, producing fixed scores (0.50, 0.33, 0.25) regardless of actual relevance. A perfect match and a random query both get 0.50 for rank 1. Cosine similarity reflects true semantic closeness.
@@ -203,6 +218,7 @@ Sparse prefetch is only included if the query produces a non-empty sparse vector
 
 ```python
 # classifier.py → _hybrid_search()
+# First checks collection exists and has data (points_count > 0)
 POST /collections/intent_collections/points/query
 {
     "prefetch": [
@@ -215,6 +231,10 @@ POST /collections/intent_collections/points/query
 }
 ```
 
+> **Note:** Prefetch limit is `HYBRID_SEARCH_TOP_K * 2` (5 * 2 = 10). The sparse prefetch is conditionally added only when `sparse_vector.is_empty()` is False.
+
+Hybrid results are also deduplicated by `service_id` (best RRF score per service).
+
 ### Routing Decision
 
 ```
@@ -251,6 +271,7 @@ Dense: Valuutakursid (cosine=0.5511), gap=0.2371
 → Runs intent detection + entity extraction on matched service only
 → Entities: {currency_from: EUR, currency_to: THB}
 → Validation: PASSED ✓
+→ Calls service endpoint → Returns response
 ```
 
 ### Path 3: AMBIGUOUS Service Match → LLM Confirmation
@@ -285,17 +306,17 @@ SERVICE (Layer 1)  →  CONTEXT (Layer 2)  →  RAG (Layer 3)  →  OOD (Layer 4
 | Path | Intent Detection | Entity Extraction |
 |------|-----------------|-------------------|
 | HIGH-CONFIDENCE | On 1 service (matched) | Yes — from LLM output |
-| AMBIGUOUS | On 2-3 candidates | Yes — if LLM matches |
+| AMBIGUOUS | On top candidates (from `top_results`) | Yes — if LLM matches |
 | Non-service | Not run | Not run |
 
 ### Intent Detection Module (DSPy)
 
 **File:** `src/tool_classifier/intent_detector.py`
 
-The DSPy `IntentDetectionModule` receives:
+The DSPy `IntentDetectionModule` uses `dspy.Predict` (direct prediction) and receives:
 - User query
-- Candidate services (formatted as JSON)
-- Conversation history (last 3 turns)
+- Candidate services (formatted as JSON with service_id, name, description, required_entities, top 3 examples)
+- Conversation history (last 3 turns, formatted as `{authorRole}: {message}`)
 
 It returns:
 ```json
@@ -336,6 +357,18 @@ Entities dict → ordered array matching service schema:
 # Array:  ["EUR", "THB"]
 ```
 
+### Service Endpoint Call
+
+After entity validation and transformation, the workflow calls the Ruuter active service endpoint:
+
+```python
+# Endpoint: {RUUTER_SERVICE_BASE_URL}/services/active/{clean_service_name}
+# Payload: {"chatId": "...", "authorId": "...", "input": ["EUR", "THB"]}
+# Response: {"response": [{"content": "..."}]} → extracts content string
+```
+
+In streaming mode, the service content is wrapped as SSE events and streamed to the client.
+
 ---
 
 ## Thresholds & Configuration
@@ -387,7 +420,3 @@ Based on empirical testing with 42 Estonian queries (20 SERVICE, 22 RAG):
 - **Adding more services:** Score distributions improve naturally — service queries score higher, non-service score lower.
 - **Adding more examples per service:** Diverse phrasings expand the embedding coverage. Aim for 5-8 examples per service covering formal + informal + different word orders.
 - **Adjusting thresholds:** Monitor the logs (`Dense search: top=... cosine=...`) and adjust if real-world scores differ from test data.
-
-### Current Limitations
-
-- **Step 7 (Ruuter service call) is not yet implemented.** The service workflow currently returns a debug response with service metadata (endpoint URL, HTTP method, extracted entities) instead of calling the actual Ruuter service endpoint. See the `TODO: STEP 7` comments in `src/tool_classifier/workflows/service_workflow.py`.
diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
index 398299a..ac92abb 100644
--- a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
+++ b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -59,32 +59,45 @@ Handle queries that require calling external services/APIs:
 
 ### High-Level Flow
 
+The service workflow has **3 routing paths** based on classification metadata from hybrid search:
+
+```
+Classification Result (from classifier.py)
+│
+├─ needs_llm_confirmation = False (HIGH-CONFIDENCE)
+│    → Skip discovery, run intent detection on matched service only
+│
+├─ needs_llm_confirmation = True (AMBIGUOUS)
+│    → Run LLM intent detection on top candidate services
+│
+└─ No metadata (LEGACY / fallback)
+     → Full service discovery + optional semantic search + intent detection
+```
+
+Each path then continues through:
 ```
-1. Service Discovery
-   ↓
-2. Service Selection (Semantic Search or LLM-based)
-   ↓
-3. Intent Detection (DSPy LLM Call)
-   ↓
-4. Entity Extraction (From LLM Output)
-   ↓
-5. Entity Validation (Against Service Schema)
-   ↓
-6. Entity Transformation (Dict → Ordered Array)
-   ↓
-7. Service Call (TODO: Ruuter endpoint invocation)
+1. Entity Extraction (from LLM output)
+↓
+2. Entity Validation (against service schema)
+↓
+3. Entity Transformation (Dict → Ordered Array)
+↓
+4. Service Endpoint Construction
+↓
+5. Service Call (Ruuter endpoint invocation)
 ```
 
 ---
 
-## 1. Service Discovery
+## Service Discovery (Legacy Path)
 
 ### Method: `_call_service_discovery()`
 
 Calls Ruuter public endpoint to fetch available services:
 
 ```python
-GET /rag-search/get-services-from-llm
+GET {RAG_SEARCH_RUUTER_PUBLIC}/services/get-services
+# Default: http://ruuter-public:8086/rag-search/services/get-services
 ```
 
 **Response Structure:**
@@ -122,16 +135,14 @@ if service_count <= 10:
     
 elif service_count > 10:
     # Many services → Use semantic search to narrow down
-    services = await _semantic_search_services(query, top_k=5)
+    services = await _semantic_search_services(query, top_k=10)
 ```
 
 ---
 
-## 2. Service Selection
+## Semantic Search (When Many Services)
 
-### Semantic Search (When Many Services)
-
-**Method:** `_semantic_search_services()`
+### Method: `_semantic_search_services()`
 
 Uses Qdrant vector database to find relevant services:
 
@@ -142,8 +153,8 @@ embedding = orchestration_service.create_embeddings_for_indexer([query])
 # 2. Search Qdrant collection
 search_payload = {
     "vector": query_embedding,
-    "limit": 5,                        # Top 5 services
-    "score_threshold": 0.4,            # Minimum similarity
+    "limit": 10,                       # Top 10 services (SEMANTIC_SEARCH_TOP_K)
+    "score_threshold": 0.2,            # Minimum similarity (SEMANTIC_SEARCH_THRESHOLD)
     "with_payload": True
 }
 
@@ -157,7 +168,7 @@ response = qdrant_client.post(
 
 ---
 
-## 3. Intent Detection (LLM-Based)
+## Intent Detection (LLM-Based)
 
 ### Method: `_detect_service_intent()`
 
@@ -189,23 +200,24 @@ services_formatted = [
         "name": "Currency Conversion",
         "description": "Convert EUR to other currencies",
         "required_entities": ["target_currency"],
-        "examples": ["How much is EUR in USD?", "Convert EUR to JPY"]
+        "examples": ["How much is EUR in USD?", "Convert EUR to JPY"]  # Top 3 examples
     }
 ]
 
 # 2. Prepare conversation context (last 3 turns)
 conversation_context = """
-user: Hello
-assistant: Hi! How can I help?
-user: How much is 100 EUR in USD?
+end_user: Hello
+backoffice_user: Hi! How can I help?
+end_user: How much is 100 EUR in USD?
 """
 
-# 3. Call DSPy module
-intent_result = intent_detector.forward(
-    user_query="How much is 100 EUR in USD?",
-    services=services_formatted,
-    conversation_history=conversation_history
-)
+# 3. Call DSPy module (uses dspy.Predict, not ChainOfThought)
+with self.llm_manager.use_task_local():
+    intent_result = intent_module.forward(
+        user_query="How much is 100 EUR in USD?",
+        services=services_formatted,
+        conversation_history=conversation_history
+    )
 ```
 
 ### LLM Output Format
@@ -226,8 +238,8 @@ The LLM returns structured JSON:
 ### Confidence Threshold
 
 ```python
-if confidence < 0.7:
-    # Low confidence → Service workflow returns None → Fallback to RAG
+if matched_service_id is None or confidence < 0.7:
+    # Low confidence → Service workflow returns None → Fallback to Context/RAG
     return None
 ```
 
@@ -251,7 +263,7 @@ costs_metric["intent_detection"] = usage_info
 
 ---
 
-## 4. Entity Extraction
+## Entity Extraction
 
 ### From LLM Output
 
@@ -299,7 +311,7 @@ Entities are extracted as **key-value pairs** where:
 
 ---
 
-## 5. Entity Validation
+## Entity Validation
 
 ### Method: `_validate_entities()`
 
@@ -367,7 +379,7 @@ validation_errors = ["Entity 'target_currency' has empty value"]
 
 ---
 
-## 6. Entity Transformation
+## Entity Transformation
 
 ### Method: `_transform_entities_to_array()`
 
@@ -397,18 +409,14 @@ entities_array = ["USD", "EUR", "100"]
 
 ```python
 def _transform_entities_to_array(
+    self,
     entities_dict: Dict[str, str],
     entity_order: List[str]
 ) -> List[str]:
     """Transform entity dict to ordered array."""
-    ordered_array = []
-    
-    for entity_key in entity_order:
-        # Get value from dict, or empty string if missing
-        value = entities_dict.get(entity_key, "")
-        ordered_array.append(value)
-    
-    return ordered_array
+    if not entity_order:
+        return []
+    return [entities_dict.get(key, "") for key in entity_order]
 ```
 
 ### Example
@@ -435,40 +443,62 @@ def _transform_entities_to_array(
 
 ---
 
-## 7. Service Call (TODO: Step 7)
+## Service Call (Step 7 — Implemented)
 
 ### Endpoint Construction
 
 ```python
-endpoint_url = f"{RUUTER_BASE_URL}/services/active{service_name}"
-# Example: "http://ruuter:8080/services/active/currency-conversion"
-# (Note: service_name from service metadata, e.g., "/currency-conversion")
+def _construct_service_endpoint(self, service_name: str, chat_id: str) -> str:
+    # Clean service name: strip whitespace, remove invisible Unicode chars, replace spaces with _
+    clean_name = service_name.strip().translate(INVISIBLE_CHAR_TABLE).replace(" ", "_")
+    return f"{RUUTER_SERVICE_BASE_URL}/services/active/{clean_name}"
+    # Example: "http://ruuter-public:8086/services/services/active/Currency_Conversion"
 ```
 
-### Payload Construction (Planned)
+### Payload Construction
 
 ```python
 payload = {
+    "chatId": chat_id,
+    "authorId": author_id,
     "input": entities_array,         # ["USD", "EUR", "100"]
-    "authorId": request.authorId,
-    "chatId": request.chatId
 }
 ```
 
-### HTTP Call (Planned)
+### HTTP Call
 
 ```python
-# Non-streaming
-response = await httpx.post(
-    endpoint_url,
-    json=payload,
-    timeout=5.0
-)
+async def _call_service_endpoint(
+    self, endpoint_url, http_method, entities_array, chat_id, author_id
+) -> Optional[str]:
+    async with httpx.AsyncClient(timeout=SERVICE_CALL_TIMEOUT) as client:
+        if http_method.upper() == "POST":
+            response = await client.post(endpoint_url, json=payload)
+        else:
+            response = await client.get(endpoint_url, params=payload)
+
+        response.raise_for_status()
+        data = response.json()
+
+        # Ruuter wraps the DSL return value in {"response": ...}
+        if isinstance(data, dict) and "response" in data:
+            data = data["response"]
+
+        # DMapper returns a JSON array; each item has a "content" field
+        if isinstance(data, list) and len(data) > 0:
+            content = data[0].get("content", "")
+            return content if content else None
+```
 
-# Streaming
-async with httpx.stream("POST", endpoint_url, json=payload) as stream:
-    async for line in stream.aiter_lines():
-        yield orchestration_service.format_sse(chat_id, line)
+### Streaming Mode
+
+In streaming mode, the service content is wrapped as SSE events:
+
+```python
+async def service_stream() -> AsyncIterator[str]:
+    yield orchestration_service.format_sse(chat_id, service_content)
+    yield orchestration_service.format_sse(chat_id, "END")
+    orchestration_service.log_costs(costs_metric)
 ```
 
 ---
@@ -483,28 +513,14 @@ async with httpx.stream("POST", endpoint_url, json=payload) as stream:
 
 ### Step-by-Step Execution
 
-#### 1. Service Discovery
-```json
-{
-  "service_count": 5,
-  "services": [
-    {
-      "serviceId": "currency_conversion_eur",
-      "name": "Currency Conversion (EUR)",
-      "entities": ["target_currency"],
-      "examples": ["How much is EUR in USD?"]
-    }
-  ]
-}
-```
-
-#### 2. Service Selection
+#### 1. Classification (Hybrid Search)
 ```python
-# Few services (5 <= 10) → Use all for intent detection
-services = discovery_result["services"]
+# Dense search finds best service match
+# cosine=0.5511, gap=0.2371
+# → HIGH-CONFIDENCE path (needs_llm_confirmation=False)
 ```
 
-#### 3. Intent Detection (LLM Call)
+#### 2. Intent Detection (LLM Call on matched service only)
 ```json
 {
   "matched_service_id": "currency_conversion_eur",
@@ -516,12 +532,12 @@ services = discovery_result["services"]
 }
 ```
 
-#### 4. Entity Extraction
+#### 3. Entity Extraction
 ```python
 entities_dict = {"target_currency": "THB"}
 ```
 
-#### 5. Entity Validation
+#### 4. Entity Validation
 ```python
 validation_result = {
   "is_valid": True,
@@ -531,7 +547,7 @@ validation_result = {
 }
 ```
 
-#### 6. Entity Transformation
+#### 5. Entity Transformation
 ```python
 # Schema: ["target_currency"]
 # Dict: {"target_currency": "THB"}
@@ -539,14 +555,17 @@ validation_result = {
 entities_array = ["THB"]
 ```
 
-#### 7. Service Call (TODO)
+#### 6. Service Call
 ```python
-# Planned implementation
-response = await call_service(
-    url="http://ruuter:8080/currency/convert",
-    method="POST",
-    payload={"input": ["THB"], "chatId": "..."}
+endpoint_url = "http://ruuter-public:8086/services/services/active/Currency_Conversion"
+response = await _call_service_endpoint(
+    endpoint_url=endpoint_url,
+    http_method="POST",
+    entities_array=["THB"],
+    chat_id="...",
+    author_id="..."
 )
+# Returns content string from Ruuter response
 ```
 
 ---
@@ -580,16 +599,16 @@ LLM USAGE COSTS BREAKDOWN:
 ### When Service Workflow Returns None
 
 ```python
-# Scenario 1: No service match (confidence < 0.7)
-if not intent_result or intent_result.get("confidence", 0) < 0.7:
+# Scenario 1: No service_id in context after intent detection
+if not context.get("service_id"):
     return None  # Fallback to CONTEXT layer
 
-# Scenario 2: Service validation failed
-if not validated_service:
+# Scenario 2: Service metadata extraction failed
+if not service_metadata:
     return None  # Fallback to CONTEXT layer
 
-# Scenario 3: No services discovered
-if not services:
+# Scenario 3: Service endpoint call failed
+if service_content is None:
     return None  # Fallback to CONTEXT layer
 ```
 
@@ -607,22 +626,31 @@ Query: "What is VAT?"
 ## Configuration Constants
 
 ```python
-# Service discovery
-RUUTER_BASE_URL = "http://ruuter.public:8080"
-SERVICE_DISCOVERY_TIMEOUT = 5.0  # seconds
+# Ruuter service configuration
+RUUTER_BASE_URL = "http://ruuter-private:8086"
+RUUTER_SERVICE_BASE_URL = "http://ruuter-public:8086/services"
+RAG_SEARCH_RUUTER_PUBLIC = "http://ruuter-public:8086/rag-search"
+
+# Service call timeouts
+SERVICE_CALL_TIMEOUT = 10             # seconds for external service calls
+SERVICE_DISCOVERY_TIMEOUT = 10.0      # seconds for service discovery
 
 # Service selection thresholds
-SERVICE_COUNT_THRESHOLD = 10      # Switch to semantic search if exceeded
-MAX_SERVICES_FOR_LLM_CONTEXT = 20 # Max services to pass to LLM
+SERVICE_COUNT_THRESHOLD = 10          # Switch to semantic search if exceeded
+MAX_SERVICES_FOR_LLM_CONTEXT = 50    # Max services to pass to LLM
 
 # Semantic search
-QDRANT_COLLECTION = "services_collection"
-SEMANTIC_SEARCH_TOP_K = 5         # Top 5 relevant services
-SEMANTIC_SEARCH_THRESHOLD = 0.4   # Minimum similarity score
-QDRANT_TIMEOUT = 2.0              # seconds
+QDRANT_COLLECTION = "intent_collections"
+SEMANTIC_SEARCH_TOP_K = 10            # Top 10 relevant services
+SEMANTIC_SEARCH_THRESHOLD = 0.2       # Minimum similarity score
+QDRANT_TIMEOUT = 10.0                 # seconds
 
-# Intent detection
-INTENT_CONFIDENCE_THRESHOLD = 0.7 # Minimum confidence to proceed
+# Hybrid search classification (see HYBRID_SEARCH_CLASSIFICATION.md)
+DENSE_MIN_THRESHOLD = 0.38            # Minimum cosine to consider service match
+DENSE_HIGH_CONFIDENCE_THRESHOLD = 0.40  # Cosine for high-confidence path
+DENSE_SCORE_GAP_THRESHOLD = 0.05     # Required gap between top two services
+DENSE_SEARCH_TOP_K = 3               # Unique services from dense search
+HYBRID_SEARCH_TOP_K = 5              # Results from hybrid RRF search
 ```
 
 ---
@@ -639,11 +667,13 @@ INTENT_CONFIDENCE_THRESHOLD = 0.7 # Minimum confidence to proceed
 - Schema defines canonical order
 - Missing entities → empty strings
 
-### 3. **Two-Stage Service Selection**
-- Few services (≤10): Pass all to LLM
-- Many services (>10): Semantic search first
+### 3. **Three Routing Paths**
+- **High-confidence**: Hybrid search matched → skip discovery, intent on 1 service
+- **Ambiguous**: Moderate match → intent detection on top candidates
+- **Legacy**: No classification metadata → full discovery flow
 
 ### 4. **LLM-Based Intent Detection**
+- Uses DSPy `dspy.Predict` (not ChainOfThought) for direct prediction
 - Intelligent service matching
 - Natural language understanding
 - Multilingual support (Estonian, English, Russian)
@@ -653,8 +683,14 @@ INTENT_CONFIDENCE_THRESHOLD = 0.7 # Minimum confidence to proceed
 - Tracks intent detection LLM costs
 - Integrated with budget system
 
+### 6. **Implemented Service Call**
+- Calls Ruuter active service endpoint via httpx
+- Handles POST and GET methods
+- Parses DMapper response format (`{"response": [{"content": "..."}]}`)
+- Cleans service name (invisible chars, whitespace → underscore)
+
 ---
 
 ## Summary
 
-The Tool Classifier's layer architecture enables intelligent query routing with graceful fallbacks. The Service Workflow (Layer 1) uses **LLM-based intent detection** to match user queries to external services, extract entities, validate them against service schemas, and prepare them for service invocation—all while maintaining comprehensive cost tracking and seamless integration with the broader RAG pipeline.
+The Tool Classifier's layer architecture enables intelligent query routing with graceful fallbacks. The Service Workflow (Layer 1) uses **hybrid search classification** (dense + sparse + RRF) to route queries into 3 paths: high-confidence (skip discovery), ambiguous (LLM confirmation on candidates), or legacy (full discovery). It then uses **LLM-based intent detection** (DSPy Predict) to match user queries to external services, extract entities, validate them against service schemas, transform to ordered arrays, and **call the Ruuter active service endpoint** — all while maintaining comprehensive cost tracking and seamless integration with the broader RAG pipeline.
diff --git a/new.txt b/new.txt
new file mode 100644
index 0000000..9e7525f
--- /dev/null
+++ b/new.txt
@@ -0,0 +1,38 @@
+1️⃣ Broneeringu kinnitus (Booking Confirmation)
+
+Estonian → English
+
+Kas minu broneering on kinnitatud?
+→ Is my booking confirmed?
+
+Palun kinnita minu broneering.
+→ Please confirm my booking.
+
+Kas broneering sai edukalt tehtud?
+→ Was the booking successfully made?
+
+2️⃣ Kalastusloa uuendamise teade (Fishing License Renewal)
+
+Estonian → English
+
+Kas minu kalastusluba tuleb uuendada?
+→ Do I need to renew my fishing license?
+
+Millal mu kalastusluba aegub?
+→ When does my fishing license expire?
+
+Kas mu kalastusluba on veel kehtiv?
+→ Is my fishing license still valid?
+
+3️⃣ Koolivaheajad (School Holidays)
+
+Estonian → English
+
+Millal on järgmine koolivaheaeg?
+→ When is the next school holiday?
+
+Kas sa saad öelda selle aasta koolivaheajad?
+→ Can you tell me the school holidays for this year?
+
+Millal algab suvevaheaeg?
+→ When does the summer holiday start?
\ No newline at end of file
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index 65f3033..64fdbe5 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -37,6 +37,9 @@
 RUUTER_BASE_URL = "http://ruuter-private:8086"
 """Base URL for Ruuter private service endpoints."""
 
+RUUTER_SERVICE_BASE_URL = "http://ruuter-public:8086/services"
+"""Base URL for Ruuter public service endpoints (active services)."""
+
 RAG_SEARCH_RUUTER_PUBLIC = "http://ruuter-public:8086/rag-search"
 """Public Ruuter endpoint for RAG search service discovery."""
 
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index bb72f78..b7fe561 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -20,9 +20,10 @@
     QDRANT_PORT,
     QDRANT_TIMEOUT,
     RAG_SEARCH_RUUTER_PUBLIC,
-    RUUTER_BASE_URL,
+    RUUTER_SERVICE_BASE_URL,
     SEMANTIC_SEARCH_THRESHOLD,
     SEMANTIC_SEARCH_TOP_K,
+    SERVICE_CALL_TIMEOUT,
     SERVICE_COUNT_THRESHOLD,
     SERVICE_DISCOVERY_TIMEOUT,
 )
@@ -105,7 +106,6 @@ async def _semantic_search_services(
             return None
 
         try:
-            # Generate embedding using orchestration service
             embedding_result = self.orchestration_service.create_embeddings_for_indexer(
                 texts=[query],
                 environment=request.environment,
@@ -120,12 +120,10 @@ async def _semantic_search_services(
 
             query_embedding = embeddings[0]
 
-            # Create Qdrant client with proper resource cleanup via context manager
             qdrant_url = f"http://{QDRANT_HOST}:{QDRANT_PORT}"
             async with httpx.AsyncClient(
                 base_url=qdrant_url, timeout=QDRANT_TIMEOUT
             ) as client:
-                # Verify collection exists and has data
                 try:
                     collection_info = await client.get(
                         f"/collections/{QDRANT_COLLECTION}"
@@ -139,7 +137,6 @@ async def _semantic_search_services(
                 except Exception as e:
                     logger.warning(f"[{chat_id}] Could not verify collection: {e}")
 
-                # Search Qdrant collection
                 search_payload = {
                     "vector": query_embedding,
                     "limit": top_k,
@@ -167,7 +164,6 @@ async def _semantic_search_services(
                     )
                     return None
 
-                # Transform Qdrant results to service format
                 services: List[Dict[str, Any]] = []
                 for point in points:
                     payload = point.get("payload", {})
@@ -180,8 +176,6 @@ async def _semantic_search_services(
                         "description": payload.get("description"),
                         "examples": payload.get("examples", []),
                         "entities": payload.get("entities", []),
-                        # Note: endpoint not stored in intent_collections,
-                        # will be resolved via database lookup if needed
                         "similarity_score": score,
                     }
                     services.append(service)
@@ -234,30 +228,24 @@ async def _detect_service_intent(
                 - usage_info: Cost and token usage information
         """
         try:
-            # Ensure DSPy is configured with LLMManager
             if self.llm_manager:
                 self.llm_manager.ensure_global_config()
             else:
                 logger.error(f"[{chat_id}] LLM Manager not available")
                 return None, {}
 
-            # Capture history length before LLM call for cost tracking
             lm = dspy.settings.lm
             history_length_before = (
                 len(lm.history) if lm and hasattr(lm, "history") else 0
             )
 
-            # Create DSPy module
             intent_module = IntentDetectionModule()
-
-            # Convert conversation history to dict format
             history_dicts = [
                 {"authorRole": msg.authorRole, "message": msg.message}
                 for msg in conversation_history
                 if hasattr(msg, "authorRole") and hasattr(msg, "message")
             ]
 
-            # Call DSPy forward with task-local config
             with self.llm_manager.use_task_local():
                 intent_result = intent_module.forward(
                     user_query=user_query,
@@ -265,7 +253,6 @@ async def _detect_service_intent(
                     conversation_history=history_dicts,
                 )
 
-            # Extract usage information after LLM call
             usage_info = get_lm_usage_since(history_length_before)
 
             return intent_result, usage_info
@@ -342,31 +329,20 @@ def _extract_service_metadata(
         self, context: Dict[str, Any], chat_id: str
     ) -> Optional[Dict[str, Any]]:
         """Extract service and entity metadata from context."""
-        # Check if service_id exists
         service_id = context.get("service_id")
         if not service_id:
             logger.error(f"[{chat_id}] Missing service_id in context")
             return None
 
-        # Check if service_data exists
         service_data = context.get("service_data")
         if not service_data:
             logger.error(f"[{chat_id}] Missing service_data in context")
             return None
 
-        # Extract entities dict from context (LLM extracted)
         entities_dict = context.get("entities", {})
-
-        # Extract entity schema from service_data (expected order)
-        entity_schema = service_data.get("entities", [])
-        if entity_schema is None:
-            entity_schema = []
-
-        # Extract service name
+        entity_schema = service_data.get("entities", []) or []
         service_name = service_data.get("name", service_id)
-
-        # Extract HTTP method (ruuter_type) - defaults to GET if not specified
-        ruuter_type = service_data.get("ruuter_type", "GET")
+        ruuter_type = service_data.get("ruuter_type", "POST")
 
         return {
             "service_id": service_id,
@@ -417,10 +393,7 @@ def _validate_entities(
             if entity_key not in service_schema:
                 extra_entities.append(entity_key)
 
-        # Determine overall validity
-        # We consider it valid even with missing entities (will send empty strings)
-        # Let the external service validate required parameters
-        is_valid = True  # Always true - we proceed with partial entities
+        is_valid = True
 
         return {
             "is_valid": is_valid,
@@ -435,29 +408,98 @@ def _transform_entities_to_array(
         """Transform entity dictionary to ordered array based on service schema."""
         if not entity_order:
             return []
-
-        # Transform to ordered array, filling missing with empty strings
         return [entities_dict.get(key, "") for key in entity_order]
 
+    _INVISIBLE_CHAR_TABLE = str.maketrans(
+        "", "", "\u2060\u200b\u200c\u200d\ufeff\u00ad\u200e\u200f"
+    )
+
     def _construct_service_endpoint(self, service_name: str, chat_id: str) -> str:
         """Construct the full service endpoint URL for Ruuter."""
-        return f"{RUUTER_BASE_URL}/services/active{service_name}"
+        clean_name = (
+            service_name.strip().translate(self._INVISIBLE_CHAR_TABLE).replace(" ", "_")
+        )
+        return f"{RUUTER_SERVICE_BASE_URL}/services/active/{clean_name}"
 
-    def _format_debug_response(
+    async def _call_service_endpoint(
         self,
-        service_name: str,
         endpoint_url: str,
         http_method: str,
         entities_array: List[str],
-    ) -> str:
-        """Format debug information for testing (temporary before Step 7 implementation)."""
-        entities_str = ", ".join(f'"{e}"' for e in entities_array)
-        return (
-            f" Service Validated: {service_name}\n"
-            f" Endpoint URL: {endpoint_url}\n"
-            f" HTTP Method: {http_method}\n"
-            f" Extracted Entities: [{entities_str}]\n\n"
-        )
+        chat_id: str,
+        author_id: str,
+    ) -> Optional[str]:
+        """Call the Ruuter active service endpoint and extract response content.
+
+        Args:
+            endpoint_url: Full URL of the active service endpoint
+            http_method: HTTP method (POST/GET)
+            entities_array: Ordered entity values for the service
+            chat_id: Chat session ID
+            author_id: Author/user ID
+
+        Returns:
+            Service response content string, or None on failure.
+        """
+        payload = {
+            "chatId": chat_id,
+            "authorId": author_id,
+            "input": entities_array,
+        }
+
+        try:
+            async with httpx.AsyncClient(timeout=SERVICE_CALL_TIMEOUT) as client:
+                if http_method.upper() == "POST":
+                    response = await client.post(endpoint_url, json=payload)
+                else:
+                    response = await client.get(endpoint_url, params=payload)
+
+                response.raise_for_status()
+                data = response.json()
+
+                # Ruuter wraps the DSL return value in {"response": ...}
+                # The inner value is the DMapper array from bot_responses_to_messages
+                if isinstance(data, dict) and "response" in data:
+                    data = data["response"]
+
+                # DMapper returns a JSON array; each item has a "content" field
+                if isinstance(data, list) and len(data) > 0:
+                    content = data[0].get("content", "")
+                    if content:
+                        logger.info(
+                            f"[{chat_id}] Service endpoint returned content "
+                            f"({len(content)} chars)"
+                        )
+                        return content
+
+                    logger.warning(
+                        f"[{chat_id}] Service response missing 'content' field"
+                    )
+                    return None
+
+                logger.warning(
+                    f"[{chat_id}] Unexpected service response format: {type(data)}"
+                )
+                return None
+
+        except httpx.TimeoutException:
+            logger.error(
+                f"[{chat_id}] Service endpoint timeout after {SERVICE_CALL_TIMEOUT}s: "
+                f"{endpoint_url}"
+            )
+            return None
+        except httpx.HTTPStatusError as e:
+            logger.error(
+                f"[{chat_id}] Service endpoint HTTP error: "
+                f"{e.response.status_code} for {endpoint_url}"
+            )
+            return None
+        except Exception as e:
+            logger.error(
+                f"[{chat_id}] Service endpoint call failed: {e}",
+                exc_info=True,
+            )
+            return None
 
     async def _log_request_details(
         self,
@@ -477,16 +519,13 @@ async def _log_request_details(
         chat_id = request.chatId
         logger.info(f"[{chat_id}] SERVICE WORKFLOW ({mode}): {request.message}")
 
-        # Service Discovery
         discovery_result = await self._call_service_discovery(chat_id)
 
         if discovery_result:
-            # Extract data from nested response structure
             response_data = discovery_result.get("response", {})
             use_semantic = response_data.get("use_semantic_search", False)
             service_count = response_data.get("service_count", 0)
 
-            # Handle service_count if it's a string or NaN
             if isinstance(service_count, str):
                 try:
                     service_count = int(service_count)
@@ -495,12 +534,10 @@ async def _log_request_details(
 
             services_from_ruuter = response_data.get("services", [])
 
-            # Use semantic search if count > threshold
             if service_count > SERVICE_COUNT_THRESHOLD:
                 use_semantic = True
 
             if use_semantic:
-                # Use semantic search to find relevant services
                 services = await self._semantic_search_services(
                     query=request.message,
                     request=request,
@@ -567,34 +604,25 @@ async def execute_async(
 
         chat_id = request.chatId
 
-        # Create costs tracking dictionary (follows RAG workflow pattern)
         costs_metric: Dict[str, Dict[str, Any]] = {}
-        # Use parent time_metric or create new one
         if time_metric is None:
             time_metric = {}
 
-        # Check if classifier provided hybrid search metadata
         needs_llm_confirmation = context.get("needs_llm_confirmation")
 
         if needs_llm_confirmation is False:
-            # HIGH CONFIDENCE PATH: Classifier matched a service with high confidence
-            # Skip service discovery — use hybrid search match directly
-            matched_service_id = context.get("matched_service_id")
             matched_service_name = context.get("matched_service_name")
             cosine_score = context.get("cosine_score", 0.0)
 
             logger.info(
-                f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (non-streaming): "
-                f"{matched_service_name} (cosine_score={cosine_score:.4f}) - "
-                f"skipping discovery"
+                f"[{chat_id}] High-confidence service match: "
+                f"{matched_service_name} (score={cosine_score:.4f})"
             )
 
-            # Get service details from top_results (already retrieved by classifier)
             top_results = context.get("top_results", [])
             if top_results:
                 matched = top_results[0]
 
-                # Run entity extraction via LLM (DSPy) for this single service
                 start_time = time.time()
                 await self._process_intent_detection(
                     services=[matched],
@@ -605,24 +633,15 @@ async def execute_async(
                 )
                 time_metric["service.intent_detection"] = time.time() - start_time
 
-                # Ensure service_data is populated from hybrid match
-                # _process_intent_detection may not set it if DSPy returns
-                # a different service_id format, so we populate it explicitly
                 if not context.get("service_data"):
                     context["service_id"] = matched.get("service_id")
                     context["service_data"] = matched
-                    logger.info(
-                        f"[{chat_id}] Populated service_data from hybrid match: "
-                        f"{matched.get('name')}"
-                    )
 
         elif needs_llm_confirmation is True:
-            # AMBIGUOUS PATH: Multiple services scored similarly
-            # Run LLM intent detection only on candidate services (not all services)
             top_results = context.get("top_results", [])
             logger.info(
-                f"[{chat_id}] AMBIGUOUS SERVICE MATCH (non-streaming): "
-                f"running LLM intent detection on {len(top_results)} candidates"
+                f"[{chat_id}] Ambiguous match: "
+                f"running intent detection on {len(top_results)} candidates"
             )
 
             start_time = time.time()
@@ -637,44 +656,26 @@ async def execute_async(
             time_metric["service.intent_detection"] = time.time() - start_time
 
         else:
-            # LEGACY PATH: No hybrid search metadata (classifier disabled or error)
-            # Full service discovery + intent detection (original behavior)
             start_time = time.time()
             await self._log_request_details(
                 request, context, mode="non-streaming", costs_metric=costs_metric
             )
             time_metric["service.discovery"] = time.time() - start_time
 
-        # Check if service was detected and validated
         if not context.get("service_id"):
-            logger.info(
-                f"[{chat_id}] No service detected or validated - "
-                f"returning None to fallback to next layer"
-            )
+            logger.info(f"[{chat_id}] No service matched, falling back")
             return None
 
-        # Entity Transformation & Validation
-        logger.info(f"[{chat_id}] Entity Transformation:")
-
-        # Step 1: Extract service metadata from context
         start_time = time.time()
         service_metadata = self._extract_service_metadata(context, chat_id)
         if not service_metadata:
-            logger.error(
-                f"[{chat_id}]   - Metadata extraction failed - "
-                f"returning None to fallback"
-            )
             return None
 
-        logger.info(f"[{chat_id}]   - Service: {service_metadata['service_name']}")
-        logger.info(
-            f"[{chat_id}]   - Schema entities: {service_metadata['entity_schema']}"
-        )
         logger.info(
-            f"[{chat_id}]   - Extracted entities: {service_metadata['entities_dict']}"
+            f"[{chat_id}] Service: {service_metadata['service_name']}, "
+            f"entities: {service_metadata['entities_dict']}"
         )
 
-        # Step 2: Validate entities against schema
         validation_result = self._validate_entities(
             extracted_entities=service_metadata["entities_dict"],
             service_schema=service_metadata["entity_schema"],
@@ -683,28 +684,11 @@ async def execute_async(
         )
         time_metric["service.entity_validation"] = time.time() - start_time
 
-        logger.info(
-            f"[{chat_id}]   - Validation status: "
-            f"{'PASSED ✓' if validation_result['is_valid'] else 'FAILED ✗'}"
-        )
-
         if validation_result["missing_entities"]:
             logger.warning(
-                f"[{chat_id}]   - Missing entities (will send empty strings): "
-                f"{validation_result['missing_entities']}"
-            )
-
-        if validation_result["extra_entities"]:
-            logger.info(
-                f"[{chat_id}]   - Extra entities (ignored): "
-                f"{validation_result['extra_entities']}"
+                f"[{chat_id}] Missing entities: {validation_result['missing_entities']}"
             )
 
-        if validation_result["validation_errors"]:
-            for error in validation_result["validation_errors"]:
-                logger.warning(f"[{chat_id}]   - Validation warning: {error}")
-
-        # Step 3: Transform entities dict to ordered array
         entities_array = self._transform_entities_to_array(
             entities_dict=service_metadata["entities_dict"],
             entity_order=service_metadata["entity_schema"],
@@ -713,46 +697,36 @@ async def execute_async(
         context["entities_array"] = entities_array
         context["validation_result"] = validation_result
 
-        # Construct service endpoint URL
         endpoint_url = self._construct_service_endpoint(
             service_name=service_metadata["service_name"], chat_id=chat_id
         )
-
         context["endpoint_url"] = endpoint_url
         context["http_method"] = service_metadata["ruuter_type"]
 
-        logger.info(f"[{chat_id}] Service prepared: {endpoint_url}")
-
-        # TODO: STEP 7 - Call Ruuter service endpoint and return response
-        # 1. Build payload: {"input": entities_array, "authorId": request.authorId, "chatId": request.chatId}
-        # 2. Call endpoint using http_method (POST/GET) with SERVICE_CALL_TIMEOUT
-        # 3. Parse Ruuter response and extract result
-        # 4. Return OrchestrationResponse with actual service result
-        # 5. Handle errors (timeout, HTTP errors, malformed JSON)
-
-        # STEP 6: Return debug response (temporary until Step 7 - Ruuter call implemented)
-        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)
-        debug_content = self._format_debug_response(
-            service_name=service_metadata["service_name"],
+        start_time = time.time()
+        service_content = await self._call_service_endpoint(
             endpoint_url=endpoint_url,
             http_method=service_metadata["ruuter_type"],
             entities_array=entities_array,
+            chat_id=chat_id,
+            author_id=request.authorId,
         )
+        time_metric["service.endpoint_call"] = time.time() - start_time
 
-        logger.info(f"[{chat_id}] Returning debug response (Step 7 pending)")
-
-        # Log costs after service workflow completes (follows RAG workflow pattern)
         if self.orchestration_service:
             self.orchestration_service.log_costs(costs_metric)
 
+        if service_content is None:
+            logger.warning(f"[{chat_id}] Service endpoint call failed, falling back")
+            return None
+
         return OrchestrationResponse(
             chatId=request.chatId,
             llmServiceActive=True,
             questionOutOfLLMScope=False,
             inputGuardFailed=False,
-            content=debug_content,
+            content=service_content,
         )
-        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)
 
     async def execute_streaming(
         self,
@@ -772,23 +746,19 @@ async def execute_streaming(
 
         chat_id = request.chatId
 
-        # Create costs tracking dictionary (follows RAG workflow pattern)
         costs_metric: Dict[str, Dict[str, Any]] = {}
-        # Use parent time_metric or create new one
         if time_metric is None:
             time_metric = {}
 
-        # Check if classifier provided hybrid search metadata
         needs_llm_confirmation = context.get("needs_llm_confirmation")
 
         if needs_llm_confirmation is False:
-            # HIGH CONFIDENCE PATH: Skip discovery, use matched service
             matched_service_name = context.get("matched_service_name")
             cosine_score = context.get("cosine_score", 0.0)
 
             logger.info(
-                f"[{chat_id}] HIGH-CONFIDENCE SERVICE MATCH (streaming): "
-                f"{matched_service_name} (cosine_score={cosine_score:.4f})"
+                f"[{chat_id}] High-confidence service match: "
+                f"{matched_service_name} (score={cosine_score:.4f})"
             )
 
             top_results = context.get("top_results", [])
@@ -805,21 +775,15 @@ async def execute_streaming(
                 )
                 time_metric["service.intent_detection"] = time.time() - start_time
 
-                # Ensure service_data is populated from hybrid match
                 if not context.get("service_data"):
                     context["service_id"] = matched.get("service_id")
                     context["service_data"] = matched
-                    logger.info(
-                        f"[{chat_id}] Populated service_data from hybrid match: "
-                        f"{matched.get('name')}"
-                    )
 
         elif needs_llm_confirmation is True:
-            # AMBIGUOUS PATH: Run LLM intent detection on candidates
             top_results = context.get("top_results", [])
             logger.info(
-                f"[{chat_id}] AMBIGUOUS SERVICE MATCH (streaming): "
-                f"{len(top_results)} candidates"
+                f"[{chat_id}] Ambiguous match: "
+                f"running intent detection on {len(top_results)} candidates"
             )
 
             start_time = time.time()
@@ -834,42 +798,25 @@ async def execute_streaming(
             time_metric["service.intent_detection"] = time.time() - start_time
 
         else:
-            # LEGACY PATH: Full service discovery (original behavior)
             start_time = time.time()
             await self._log_request_details(
                 request, context, mode="streaming", costs_metric=costs_metric
             )
             time_metric["service.discovery"] = time.time() - start_time
 
-        # Check if service was detected and validated
         if not context.get("service_id"):
-            logger.info(
-                f"[{chat_id}] No service detected or validated - "
-                f"returning None to fallback to next layer"
-            )
+            logger.info(f"[{chat_id}] No service matched, falling back")
             return None
 
-        # Entity Transformation & Validation
-        logger.info(f"[{chat_id}] Entity Transformation:")
-
-        # Step 1: Extract service metadata from context
         service_metadata = self._extract_service_metadata(context, chat_id)
         if not service_metadata:
-            logger.error(
-                f"[{chat_id}]   - Metadata extraction failed - "
-                f"returning None to fallback"
-            )
             return None
 
-        logger.info(f"[{chat_id}]   - Service: {service_metadata['service_name']}")
         logger.info(
-            f"[{chat_id}]   - Schema entities: {service_metadata['entity_schema']}"
-        )
-        logger.info(
-            f"[{chat_id}]   - Extracted entities: {service_metadata['entities_dict']}"
+            f"[{chat_id}] Service: {service_metadata['service_name']}, "
+            f"entities: {service_metadata['entities_dict']}"
         )
 
-        # Step 2: Validate entities against schema
         validation_result = self._validate_entities(
             extracted_entities=service_metadata["entities_dict"],
             service_schema=service_metadata["entity_schema"],
@@ -877,28 +824,11 @@ async def execute_streaming(
             chat_id=chat_id,
         )
 
-        logger.info(
-            f"[{chat_id}]   - Validation status: "
-            f"{'PASSED ✓' if validation_result['is_valid'] else 'FAILED ✗'}"
-        )
-
         if validation_result["missing_entities"]:
             logger.warning(
-                f"[{chat_id}]   - Missing entities (will send empty strings): "
-                f"{validation_result['missing_entities']}"
+                f"[{chat_id}] Missing entities: {validation_result['missing_entities']}"
             )
 
-        if validation_result["extra_entities"]:
-            logger.info(
-                f"[{chat_id}]   - Extra entities (ignored): "
-                f"{validation_result['extra_entities']}"
-            )
-
-        if validation_result["validation_errors"]:
-            for error in validation_result["validation_errors"]:
-                logger.warning(f"[{chat_id}]   - Validation warning: {error}")
-
-        # Step 3: Transform entities dict to ordered array
         entities_array = self._transform_entities_to_array(
             entities_dict=service_metadata["entities_dict"],
             entity_order=service_metadata["entity_schema"],
@@ -907,47 +837,32 @@ async def execute_streaming(
         context["entities_array"] = entities_array
         context["validation_result"] = validation_result
 
-        # Construct service endpoint URL
         endpoint_url = self._construct_service_endpoint(
             service_name=service_metadata["service_name"], chat_id=chat_id
         )
-
         context["endpoint_url"] = endpoint_url
         context["http_method"] = service_metadata["ruuter_type"]
 
-        logger.info(f"[{chat_id}] Service prepared: {endpoint_url}")
-
-        # TODO: STEP 7 - Call Ruuter service endpoint and stream response
-        # 1. Build payload: {"input": entities_array, "authorId": request.authorId, "chatId": request.chatId}
-        # 2. Call endpoint using http_method (POST/GET) with SERVICE_CALL_TIMEOUT
-        # 3. Parse Ruuter response and extract result
-        # 4. Format result as SSE and yield chunks
-        # 5. Handle errors (timeout, HTTP errors, malformed JSON)
-
-        # STEP 6: Return debug response as async iterator (temporary until Step 7)
-        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (START)
-        debug_content = self._format_debug_response(
-            service_name=service_metadata["service_name"],
+        service_content = await self._call_service_endpoint(
             endpoint_url=endpoint_url,
             http_method=service_metadata["ruuter_type"],
             entities_array=entities_array,
+            chat_id=chat_id,
+            author_id=request.authorId,
         )
 
-        logger.info(f"[{chat_id}] Streaming debug response (Step 7 pending)")
+        if service_content is None:
+            logger.warning(f"[{chat_id}] Service endpoint call failed, falling back")
+            return None
 
         if self.orchestration_service is None:
             raise RuntimeError("Orchestration service not initialized for streaming")
 
-        # Store reference for closure (helps type checker)
         orchestration_service = self.orchestration_service
 
-        async def debug_stream() -> AsyncIterator[str]:
-            yield orchestration_service.format_sse(chat_id, debug_content)
+        async def service_stream() -> AsyncIterator[str]:
+            yield orchestration_service.format_sse(chat_id, service_content)
             yield orchestration_service.format_sse(chat_id, "END")
-
-            # Log costs after streaming completes (follows RAG workflow pattern)
-            # Must be inside generator because costs are accumulated during streaming
             orchestration_service.log_costs(costs_metric)
 
-        return debug_stream()
-        # REMOVE THIS BLOCK AFTER STEP 7 IMPLEMENTATION (END)
+        return service_stream()
diff --git a/tests/data/classification_test_queries.json b/tests/data/classification_test_queries.json
new file mode 100644
index 0000000..28bb481
--- /dev/null
+++ b/tests/data/classification_test_queries.json
@@ -0,0 +1,266 @@
+[
+  {
+    "query": "Mitu töötajat on ettevõttes Bolt?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_employees",
+    "language": "et"
+  },
+  {
+    "query": "Kui palju inimesi töötab firmas Tallink?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_employees",
+    "language": "et"
+  },
+  {
+    "query": "Mis on Swedbanki töötajate arv?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_employees",
+    "language": "et"
+  },
+  {
+    "query": "Kui palju töötajaid on ettevõttel Eesti Energia?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_employees",
+    "language": "et"
+  },
+  {
+    "query": "Mis on ettevõtte aasta käive?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_revenue",
+    "language": "et"
+  },
+  {
+    "query": "Kui suur on firma käive?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_revenue",
+    "language": "et"
+  },
+  {
+    "query": "Kui palju maksis ettevõte tööjõumakse?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_workforce_taxes",
+    "language": "et"
+  },
+  {
+    "query": "Kui palju maksis ettevõte riiklikke makse?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_national_taxes",
+    "language": "et"
+  },
+  {
+    "query": "Kes on firma tegelikud kasusaajad?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_beneficiaries",
+    "language": "et"
+  },
+  {
+    "query": "Mis on ettevõtte kontaktandmed?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_companies_contactdetails",
+    "language": "et"
+  },
+  {
+    "query": "Millal on selle aasta koolivaheajad?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_school_holiday",
+    "language": "et"
+  },
+  {
+    "query": "Mis olid viimaste NBA mängude tulemused?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_nba_results",
+    "language": "et"
+  },
+  {
+    "query": "Mis on euro ja dollari vahetuskurss?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_exchange_rate",
+    "language": "et"
+  },
+  {
+    "query": "Mis on viis viimast avalikku algatust?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_teenus_citizien_initiative",
+    "language": "et"
+  },
+  {
+    "query": "Mis on hetkel populaarsemad rahvaalgatused?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_teenus_citizien_initiative_popular",
+    "language": "et"
+  },
+  {
+    "query": "Kui palju kasvasid tarbija hinnad eelmisel aastal?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_CPI",
+    "language": "et"
+  },
+  {
+    "query": "Mis ilm on Tallinnas?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_teenus_ilm",
+    "language": "et"
+  },
+  {
+    "query": "Kas Narvas on ilus ilm?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_teenus_ilm",
+    "language": "et"
+  },
+  {
+    "query": "Mis on ööpäeva odavaim elektri hind?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_teenus_nordpool2",
+    "language": "et"
+  },
+  {
+    "query": "Kus leida diiselkütuse hinnaindeks?",
+    "expected_category": "SERVICE",
+    "expected_service_id": "common_service_CPI",
+    "language": "et"
+  },
+  {
+    "query": "Miks ID-kaart ei tööta e-teenustes, kuigi DigiDoc4 loeb kaardi andmed sisse?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas Safari brauseris vahemälu tühjendada, kui ID-kaardiga sisselogimine ei tööta?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas kontrollida ja lubada ID-kaardi jaoks vajalikke laiendusi Firefoxi brauseris?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida teha, kui Firefoxis puudub või ei tööta Web eID või PKCS11 loader laiendus?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas anda dokumendile digiallkiri DigiDoc4 abil Windows 10 või Windows 11 arvutis?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas allkirjastada mitu faili korraga DigiDoc4-s mobiil-ID abil?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida teha, kui mu telefon koos Mobiil-IDga on kadunud või varastatud?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas peatada ja hiljem taastada Mobiil-ID sertifikaadid?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas allkirjastada dokument DigiDoc rakenduses mobiil-ID abil samm-sammult?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida pean kontrollima enne, kui annan DigiDocis dokumendile mobiil-IDga digiallkirja ja kuidas see pärast salvestada?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas siseneda e-teenustesse mobiil-ID abil samm-sammult?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida teha, kui mobiil-IDga sisselogimisel kontrollkoodid ei kattu või küsitakse ootamatult PIN-koodi?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida pean tegema, kui mu ID-kaart või mobiiltelefon (Mobiil-ID) on kadunud või varastatud?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas peatada ja hiljem taastada ID-kaardi ja Mobiil-ID sertifikaadid?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas siseneda e-teenustesse mobiil-ID abil samm-sammult?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida teha, kui mobiil-IDga sisselogimisel kontrollkood ei kattu või küsitakse ootamatult PIN-koodi?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kust saab alla laadida ja paigaldada ametliku ID-kaardi tarkvara (DigiDoc4 ja Web eID) Windowsi, macOS-i ja mobiili jaoks?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Milliseid rakendusi on vaja ID-kaardi ja digiallkirja kasutamiseks Androidi ja iPhone’i telefonis?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Milleks on ID-kaardi sertifikaadid ja mis vahe on PIN1- ja PIN2-sertifikaadil?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Mida teha, kui mu ID-kaart või mobiil-ID on kadunud ja kuidas sertifikaate peatada?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas allkirjastada dokumente mobiil-ID abil RIA DigiDoc rakenduses samm-sammult?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas salvestada ja jagada DigiDocis allkirjastatud dokumendiümbrik ning lisada korraga mitu faili?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas anda digiallkiri dokumentidele DigiDoc4 rakenduses mobiil-ID abil Windows 10 või 11 arvutis?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  },
+  {
+    "query": "Kuidas allkirjastada mitu faili korraga DigiDoc4-s ja kontrollida mobiil-ID kontrollkoodi?",
+    "expected_category": "RAG",
+    "expected_service_id": "",
+    "language": "et"
+  }
+]

From 9ce1da2897f1b3a80e70d350a7b77a4cc2f075b3 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Mon, 9 Mar 2026 09:18:57 +0530
Subject: [PATCH 21/27] context based response generation flow

---
 docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md   | 323 +++++++
 src/llm_orchestration_service.py              |   5 +
 src/llm_orchestration_service_api.py          |   2 +-
 src/llm_orchestrator_config/stream_config.py  |   7 +-
 src/tool_classifier/classifier.py             |  15 +-
 src/tool_classifier/constants.py              |   6 +-
 src/tool_classifier/context_analyzer.py       | 893 ++++++++++++++++++
 src/tool_classifier/greeting_constants.py     |  40 +
 .../workflows/context_workflow.py             | 350 ++++++-
 src/tool_classifier/workflows/rag_workflow.py |  47 +-
 .../workflows/service_workflow.py             |  17 +
 src/utils/rate_limiter.py                     | 131 ++-
 12 files changed, 1702 insertions(+), 134 deletions(-)
 create mode 100644 docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
 create mode 100644 src/tool_classifier/context_analyzer.py
 create mode 100644 src/tool_classifier/greeting_constants.py

diff --git a/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md b/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
new file mode 100644
index 0000000..4df8d1e
--- /dev/null
+++ b/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
@@ -0,0 +1,323 @@
+# Context Workflow: Greeting Detection and Conversation History Analysis
+
+## Overview
+
+The **Context Workflow (Layer 2)** intercepts user queries that can be answered without searching the knowledge base. It handles two categories:
+
+1. **Greetings** — Detects and responds to social exchanges (hello, goodbye, thanks) in multiple languages
+2. **Conversation history references** — Answers follow-up questions that refer to information already discussed in the session
+
+When the context workflow can answer, a response is returned immediately, bypassing the RAG pipeline entirely. When it cannot answer, the query falls through to the RAG workflow (Layer 3).
+
+---
+
+## Architecture
+
+### Position in the Classifier Chain
+
+```
+User Query
+    ↓
+Layer 1: SERVICE   → External API calls
+    ↓ (cannot handle)
+Layer 2: CONTEXT   → Greetings + conversation history  ←── This document
+    ↓ (cannot handle)
+Layer 3: RAG       → Knowledge base retrieval
+    ↓ (cannot handle)
+Layer 4: OOD       → Out-of-domain fallback
+```
+
+### Key Components
+
+| Component | File | Responsibility |
+|-----------|------|----------------|
+| `ContextAnalyzer` | `src/tool_classifier/context_analyzer.py` | LLM-based greeting detection and context analysis |
+| `ContextWorkflowExecutor` | `src/tool_classifier/workflows/context_workflow.py` | Orchestrates the workflow, handles streaming/non-streaming |
+| `ToolClassifier` | `src/tool_classifier/classifier.py` | Invokes `ContextAnalyzer` during classification and routes to `ContextWorkflowExecutor` |
+| `greeting_constants.py` | `src/tool_classifier/greeting_constants.py` | Fallback greeting responses for Estonian and English |
+
+---
+
+## Full Request Flow
+
+```
+User Query + Conversation History
+    ↓
+ToolClassifier.classify()
+    ├─ Layer 1 (SERVICE): Embedding-based intent routing
+    │      └─ If no service tool matches → route to CONTEXT workflow
+    │
+    └─ ClassificationResult(workflow=CONTEXT)
+
+ToolClassifier.route_to_workflow()
+    ├─ Non-streaming → ContextWorkflowExecutor.execute_async()
+    │      ├─ Phase 1: _detect() → context_analyzer.detect_context() [classification only]
+    │      ├─ If greeting → return greeting OrchestrationResponse
+    │      ├─ If can_answer → _generate_response_async() → context_analyzer.generate_context_response()
+    │      └─ Otherwise → return None (RAG fallback)
+    │
+    └─ Streaming → ContextWorkflowExecutor.execute_streaming()
+           ├─ Phase 1: _detect() → context_analyzer.detect_context() [classification only]
+           ├─ If greeting → _stream_greeting() async generator
+           ├─ If can_answer → _create_history_stream() → context_analyzer.stream_context_response()
+           └─ Otherwise → return None (RAG fallback)
+```
+
+---
+
+## Phase 1: Detection (Classify Only)
+
+### LLM Task
+
+Every query is checked against the **most recent 10 conversation turns** using a single LLM call (`detect_context()`). This phase **does not generate an answer** — it only classifies the query and extracts a relevant context snippet for Phase 2.
+
+The `ContextDetectionSignature` DSPy signature instructs the LLM to:
+
+1. Detect if the query is a greeting in any supported language
+2. Check if the query references something discussed in the last 10 turns
+3. If the query can be answered from history, extract the relevant snippet
+4. Do **not** generate the final answer here — detection only
+
+### LLM Output Format
+
+The LLM returns a JSON object parsed into `ContextDetectionResult`:
+
+```json
+{
+  "is_greeting": false,
+  "can_answer_from_context": true,
+  "reasoning": "User is asking about tax rate discussed earlier",
+  "context_snippet": "Bot confirmed the flat rate is 20%, applying equally to all income brackets."
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `is_greeting` | `bool` | Whether the query is a greeting |
+| `can_answer_from_context` | `bool` | Whether the query can be answered from conversation history |
+| `reasoning` | `str` | Brief explanation of the detection decision |
+| `context_snippet` | `str \| null` | Relevant excerpt from history for use in Phase 2, or `null` |
+
+> **Internal field**: `answered_from_summary` (bool, default `False`) is reserved for future summary-based detection paths.
+
+### Decision After Phase 1
+
+```
+is_greeting=True                              → Phase 2: return greeting response (no LLM call)
+can_answer_from_context=True AND snippet set  → Phase 2: generate answer from snippet
+Otherwise                                     → Fall back to RAG
+```
+
+---
+
+## Phase 2: Response Generation
+
+### Non-Streaming (`_generate_response_async`)
+
+Calls `generate_context_response(query, context_snippet)` which uses `ContextResponseGenerationSignature` to produce a complete answer in a single LLM call. Output guardrails are applied before returning the `OrchestrationResponse`.
+
+### Streaming (`_create_history_stream` → `stream_context_response`)
+
+Calls `stream_context_response(query, context_snippet)` which uses DSPy native streaming (`dspy.streamify`) with `ContextResponseGenerationSignature`. Tokens are yielded in real time and passed through NeMo Guardrails before being SSE-formatted.
+
+---
+
+---
+
+## Greeting Detection
+
+### Supported Languages
+
+| Language | Code |
+|----------|------|
+| Estonian | `et` |
+| English | `en` |
+
+### Supported Greeting Types
+
+| Type | Estonian Examples | English Examples |
+|------|-------------------|-----------------|
+| `hello` | Tere, Hei, Tervist, Moi | Hello, Hi, Hey, Good morning |
+| `goodbye` | Nägemist, Tšau | Bye, Goodbye, See you, Good night |
+| `thanks` | Tänan, Aitäh, Tänud | Thank you, Thanks |
+| `casual` | Tere, Tervist | Hey |
+
+### Greeting Response Generation
+
+The LLM generates contextually appropriate responses in the **same language** as the query. If the LLM detects a greeting but fails to produce an answer (e.g., JSON parse error), the system falls back to predefined static responses from `greeting_constants.py`.
+
+**Fallback responses (`greeting_constants.py`):**
+
+```python
+GREETINGS_ET = {
+    "hello":   "Tere! Kuidas ma saan sind aidata?",
+    "goodbye": "Nägemist! Head päeva!",
+    "thanks":  "Palun! Kui on veel küsimusi, küsi julgelt.",
+    "casual":  "Tere! Mida ma saan sinu jaoks teha?",
+}
+
+GREETINGS_EN = {
+    "hello":   "Hello! How can I help you?",
+    "goodbye": "Goodbye! Have a great day!",
+    "thanks":  "You're welcome! Feel free to ask if you have more questions.",
+    "casual":  "Hey! What can I do for you?",
+}
+```
+
+The fallback greeting type is determined by keyword matching in `_detect_greeting_type()` — checking for `thank/tänan/aitäh`, `bye/goodbye/nägemist/tšau`, before defaulting to `hello`.
+
+---
+
+## Streaming Support
+
+The context workflow supports both response modes:
+
+### Non-Streaming (`execute_async`)
+
+Returns a complete `OrchestrationResponse` object with the answer as a single string. Output guardrails are applied before the response is returned.
+
+### Streaming (`execute_streaming`)
+
+Returns an `AsyncIterator[str]` that yields SSE (Server-Sent Events) chunks.
+
+**Greeting responses** are yielded as a single SSE chunk followed by `END`.
+
+**History responses** use DSPy native streaming (`dspy.streamify`) with `ContextResponseGenerationSignature`. Tokens are emitted in real time as they arrive from the LLM, then passed through NeMo Guardrails (`stream_with_guardrails`) before being SSE-formatted. If a guardrail violation is detected in a chunk, streaming stops and the violation message is sent instead.
+
+**SSE Format:**
+```
+data: {"chatId": "abc123", "payload": {"content": "Tere! Kuidas ma"}, "timestamp": "...", "sentTo": []}
+
+data: {"chatId": "abc123", "payload": {"content": " saan sind aidata?"}, "timestamp": "...", "sentTo": []}
+
+data: {"chatId": "abc123", "payload": {"content": "END"}, "timestamp": "...", "sentTo": []}
+```
+
+---
+
+## Cost Tracking
+
+LLM token usage and cost is tracked via `get_lm_usage_since()` and stored in `costs_metric` within the workflow executor. Costs are logged via `orchestration_service.log_costs()` at the end of each execution path.
+
+Two cost keys are tracked separately:
+
+```python
+costs_metric = {
+    "context_detection": {
+        # Phase 1: detect_context() — single LLM call
+        "total_cost": 0.0012,
+        "total_tokens": 180,
+        "total_prompt_tokens": 150,
+        "total_completion_tokens": 30,
+        "num_calls": 1,
+    },
+    "context_response": {
+        # Phase 2: generate_context_response() or stream_context_response()
+        "total_cost": 0.003,
+        "total_tokens": 140,
+        "total_prompt_tokens": 100,
+        "total_completion_tokens": 40,
+        "num_calls": 1,
+    },
+}
+```
+
+Greeting responses skip Phase 2, so only `"context_detection"` cost is populated.
+
+---
+
+---
+
+## Error Handling and Fallback
+
+| Failure Point | Behaviour |
+|---------------|-----------|
+| Phase 1 LLM call raises exception | `can_answer_from_context=False` → falls back to RAG |
+| Phase 1 returns invalid JSON | Logged as warning, all flags default to `False` → falls back to RAG |
+| Phase 2 LLM call raises exception | Logged as error, `_generate_response_async` returns `None` → falls back to RAG |
+| Phase 2 returns empty answer | Logged as warning → falls back to RAG |
+| Output guardrails fail | Logged as warning, response returned without guardrail check |
+| Guardrail violation in streaming | `OUTPUT_GUARDRAIL_VIOLATION_MESSAGE` sent, stream terminated |
+| `orchestration_service` unavailable | History streaming skipped → `None` returned → RAG fallback |
+| `guardrails_adapter` not a `NeMoRailsAdapter` | Logged as warning → cannot stream → RAG fallback |
+| Any unhandled exception in executor | Error logged, `execute_async/execute_streaming` returns `None` → RAG fallback via classifier |
+
+---
+
+## Logging
+
+Key log entries emitted during a request:
+
+| Level | Message | When |
+|-------|---------|------|
+| `INFO` | `CONTEXT WORKFLOW (NON-STREAMING) \| Query: '...'` | `execute_async()` entry |
+| `INFO` | `CONTEXT WORKFLOW (STREAMING) \| Query: '...'` | `execute_streaming()` entry |
+| `INFO` | `CONTEXT DETECTOR: Phase 1 \| Query: '...' \| History: N turns` | `detect_context()` entry |
+| `INFO` | `DETECTION RESULT \| Greeting: ... \| Can Answer: ... \| Has snippet: ...` | Phase 1 LLM response parsed |
+| `INFO` | `Detection cost \| Total: $... \| Tokens: N` | After Phase 1 cost tracked |
+| `INFO` | `Detection: greeting=... can_answer=...` | After `_detect()` returns in executor |
+| `INFO` | `CONTEXT GENERATOR: Phase 2 non-streaming \| Query: '...'` | `generate_context_response()` entry |
+| `INFO` | `CONTEXT GENERATOR: Phase 2 streaming \| Query: '...'` | `stream_context_response()` entry |
+| `INFO` | `Context response streaming complete (final Prediction received)` | DSPy streaming finished |
+| `WARNING` | `[chatId] Phase 2 empty answer — fallback to RAG` | Phase 2 returned no content |
+| `WARNING` | `[chatId] Guardrails violation in context streaming` | Violation detected mid-stream |
+| `WARNING` | `[chatId] Cannot answer from context — falling back to RAG` | Neither phase could answer |
+
+---
+
+## Data Models
+
+### `ContextDetectionResult` (Phase 1 output)
+
+```python
+class ContextDetectionResult(BaseModel):
+    is_greeting: bool               # True if query is a greeting
+    can_answer_from_context: bool   # True if query can be answered from last 10 turns
+    reasoning: str                  # LLM's brief explanation
+    answered_from_summary: bool     # Reserved; always False in current workflow
+    context_snippet: Optional[str]  # Relevant excerpt for Phase 2 generation, or None
+```
+
+### `ContextDetectionSignature` (DSPy — Phase 1)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `conversation_history` | Input | Last 10 turns formatted as JSON |
+| `user_query` | Input | Current user query |
+| `detection_result` | Output | JSON with `is_greeting`, `can_answer_from_context`, `reasoning`, `context_snippet` |
+
+> Detection only — **no answer generated here**.
+
+### `ContextResponseGenerationSignature` (DSPy — Phase 2)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `context_snippet` | Input | Relevant excerpt from Phase 1 |
+| `user_query` | Input | Current user query |
+| `answer` | Output | Natural language response in the same language as the query |
+
+---
+
+## Decision Summary Table
+
+| Scenario | Phase 1 LLM Calls | Phase 2 LLM Calls | Outcome |
+|----------|--------------------|--------------------|---------|
+| Greeting detected | 1 (`detect_context`) | 0 (static response) | Context responds (greeting) |
+| Follow-up answerable from last 10 turns | 1 (`detect_context`) | 1 (`generate_context_response` or `stream_context_response`) | Context responds |
+| Cannot answer from last 10 turns | 1 (`detect_context`) | 0 | Falls back to RAG |
+| Phase 1 LLM error / JSON parse failure | — | 0 | Falls back to RAG |
+| Phase 2 LLM error or empty answer | 1 | — | Falls back to RAG |
+
+---
+
+## File Reference
+
+| File | Purpose |
+|------|---------|
+| `src/tool_classifier/context_analyzer.py` | Core LLM analysis logic (all three steps) |
+| `src/tool_classifier/workflows/context_workflow.py` | Workflow executor (streaming + non-streaming) |
+| `src/tool_classifier/classifier.py` | Classification layer that invokes context analysis |
+| `src/tool_classifier/greeting_constants.py` | Static fallback greeting responses (ET/EN) |
+| `tests/test_context_analyzer.py` | Unit tests for `ContextAnalyzer` |
+| `tests/test_context_workflow.py` | Unit tests for `ContextWorkflowExecutor` |
+| `tests/test_context_workflow_integration.py` | Integration tests for the full classify → route → execute chain |
\ No newline at end of file
diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py
index 7f7432f..7889987 100644
--- a/src/llm_orchestration_service.py
+++ b/src/llm_orchestration_service.py
@@ -639,11 +639,13 @@ async def stream_orchestration_response(
                             )
 
                         # Classify query to determine workflow
+                        start_time = time.time()
                         classification = await self.tool_classifier.classify(
                             query=request.message,
                             conversation_history=request.conversationHistory,
                             language=detected_language,
                         )
+                        time_metric["classifier.classify"] = time.time() - start_time
 
                         logger.info(
                             f"[{request.chatId}] [{stream_ctx.stream_id}] Classification: {classification.workflow.value} "
@@ -652,11 +654,14 @@ async def stream_orchestration_response(
 
                         # Route to appropriate workflow (streaming)
                         # route_to_workflow returns AsyncIterator[str] when is_streaming=True
+                        start_time = time.time()
                         stream_result = await self.tool_classifier.route_to_workflow(
                             classification=classification,
                             request=request,
                             is_streaming=True,
+                            time_metric=time_metric,
                         )
+                        time_metric["classifier.route"] = time.time() - start_time
 
                         async for sse_chunk in stream_result:
                             yield sse_chunk
diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py
index 0e9b127..110c299 100644
--- a/src/llm_orchestration_service_api.py
+++ b/src/llm_orchestration_service_api.py
@@ -71,7 +71,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
         if StreamConfig.RATE_LIMIT_ENABLED:
             app.state.rate_limiter = RateLimiter(
                 requests_per_minute=StreamConfig.RATE_LIMIT_REQUESTS_PER_MINUTE,
-                tokens_per_second=StreamConfig.RATE_LIMIT_TOKENS_PER_SECOND,
+                tokens_per_minute=StreamConfig.RATE_LIMIT_TOKENS_PER_MINUTE,
             )
             logger.info("Rate limiter initialized successfully")
         else:
diff --git a/src/llm_orchestrator_config/stream_config.py b/src/llm_orchestrator_config/stream_config.py
index ad19338..84e5edd 100644
--- a/src/llm_orchestrator_config/stream_config.py
+++ b/src/llm_orchestrator_config/stream_config.py
@@ -21,8 +21,7 @@ class StreamConfig:
 
     # Rate Limiting Configuration
     RATE_LIMIT_ENABLED: bool = True  # Enable/disable rate limiting
-    RATE_LIMIT_REQUESTS_PER_MINUTE: int = 10  # Max requests per user per minute
-    RATE_LIMIT_TOKENS_PER_SECOND: int = (
-        100  # Max tokens per user per second (burst control)
-    )
+    RATE_LIMIT_REQUESTS_PER_MINUTE: int = 20  # Max requests per user per minute
+    RATE_LIMIT_TOKENS_PER_MINUTE: int = 40_000  # Max tokens per user per minute
     RATE_LIMIT_CLEANUP_INTERVAL: int = 300  # Cleanup old entries every 5 minutes
+    RATE_LIMIT_TOKEN_WINDOW_SECONDS: int = 60  # Sliding window size for token tracking
diff --git a/src/tool_classifier/classifier.py b/src/tool_classifier/classifier.py
index f18ef3e..1ada894 100644
--- a/src/tool_classifier/classifier.py
+++ b/src/tool_classifier/classifier.py
@@ -57,9 +57,9 @@ class ToolClassifier:
 
     def __init__(
         self,
-        llm_manager: Any,
-        orchestration_service: Any,
-    ):
+        llm_manager: Any,  # noqa: ANN401
+        orchestration_service: Any,  # noqa: ANN401
+    ) -> None:
         """
         Initialize tool classifier with required dependencies.
 
@@ -88,6 +88,7 @@ def __init__(
         )
         self.context_workflow = ContextWorkflowExecutor(
             llm_manager=llm_manager,
+            orchestration_service=orchestration_service,
         )
         self.rag_workflow = RAGWorkflowExecutor(
             orchestration_service=orchestration_service,
@@ -622,7 +623,7 @@ def _get_workflow_executor(self, workflow_type: WorkflowType) -> Any:
 
     async def _execute_with_fallback_async(
         self,
-        workflow: Any,
+        workflow: Any,  # noqa: ANN401
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
@@ -696,11 +697,11 @@ async def _execute_with_fallback_async(
             if rag_result is not None:
                 return rag_result
             else:
-                raise RuntimeError("RAG workflow returned None unexpectedly")
+                raise RuntimeError("RAG workflow returned None unexpectedly") from e
 
     async def _execute_with_fallback_streaming(
         self,
-        workflow: Any,
+        workflow: Any,  # noqa: ANN401
         request: OrchestrationRequest,
         context: Dict[str, Any],
         start_layer: WorkflowType,
@@ -782,4 +783,4 @@ async def _execute_with_fallback_streaming(
                 async for chunk in streaming_result:
                     yield chunk
             else:
-                raise RuntimeError("RAG workflow returned None unexpectedly")
+                raise RuntimeError("RAG workflow returned None unexpectedly") from e
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index 65f3033..d839e2c 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -70,13 +70,15 @@
 DENSE_SEARCH_TOP_K = 3
 """Number of top results from dense-only search for relevance scoring."""
 
-DENSE_MIN_THRESHOLD = 0.38
+# DENSE_MIN_THRESHOLD = 0.38
+DENSE_MIN_THRESHOLD = 0.5
 """Minimum dense cosine similarity to consider a result as a potential match.
 Below this → skip SERVICE entirely, go to CONTEXT/RAG.
 Note: Multilingual embeddings (Estonian/short queries) typically yield
 lower cosine scores (0.25-0.40) than English. Tune based on observed scores."""
 
-DENSE_HIGH_CONFIDENCE_THRESHOLD = 0.40
+# DENSE_HIGH_CONFIDENCE_THRESHOLD = 0.40
+DENSE_HIGH_CONFIDENCE_THRESHOLD = 0.55
 """Dense cosine similarity for high-confidence service classification.
 Above this AND score gap is large → SERVICE without LLM confirmation."""
 
diff --git a/src/tool_classifier/context_analyzer.py b/src/tool_classifier/context_analyzer.py
new file mode 100644
index 0000000..51aa214
--- /dev/null
+++ b/src/tool_classifier/context_analyzer.py
@@ -0,0 +1,893 @@
+"""Context analyzer for greeting detection and conversation history analysis."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator, Dict, List, Optional
+import json
+import dspy
+import dspy.streaming
+from dspy.streaming import StreamListener
+from loguru import logger
+from pydantic import BaseModel, Field
+
+from src.utils.cost_utils import get_lm_usage_since
+from src.tool_classifier.greeting_constants import get_greeting_response
+
+
+class ContextAnalysisResult(BaseModel):
+    """Result of context analysis."""
+
+    is_greeting: bool = Field(
+        ..., description="Whether the query is a greeting (hello, goodbye, thanks)"
+    )
+    can_answer_from_context: bool = Field(
+        ..., description="Whether the query can be answered from conversation history"
+    )
+    answer: Optional[str] = Field(
+        None, description="Generated response (greeting or context-based answer)"
+    )
+    reasoning: str = Field(..., description="Brief explanation of the analysis")
+    answered_from_summary: bool = Field(
+        default=False,
+        description="Whether the answer was derived from a conversation summary (older turns beyond recent 10)",
+    )
+
+
+class ContextAnalysisSignature(dspy.Signature):
+    """Analyze user query for greeting detection and conversation history references.
+
+    This signature instructs the LLM to:
+    1. Detect greetings in multiple languages (Estonian, English)
+    2. Check if query references conversation history
+    3. Generate appropriate responses or extract answers from history
+
+    Supported greeting types:
+    - hello: Tere, Hello, Hi, Hei, Hey, Moi, Good morning, Good afternoon, Good evening
+    - goodbye: Nägemist, Bye, Goodbye, See you, Good night
+    - thanks: Tänan, Aitäh, Thank you, Thanks, Much appreciated
+    - casual: Tervist, Tšau, Moikka
+
+    The LLM should respond in the SAME language as the user's query.
+    """
+
+    conversation_history: str = dspy.InputField(
+        desc="Recent conversation history (last 10 turns) formatted as JSON"
+    )
+    user_query: str = dspy.InputField(
+        desc="Current user query to analyze for greetings or context references"
+    )
+    analysis_result: str = dspy.OutputField(
+        desc='JSON object with: {"is_greeting": bool, "can_answer_from_context": bool, "answer": str|null, "reasoning": str}. '
+        "For greetings, generate a friendly response in the same language. "
+        "For context references, extract the answer from conversation history if available."
+    )
+
+
+class ConversationSummarySignature(dspy.Signature):
+    """Generate a concise summary of conversation history.
+
+    Summarize the key topics, facts, decisions, and information discussed
+    in the conversation. Preserve specific details like numbers, names,
+    dates, and other factual information that might be referenced later.
+
+    The summary should be in the SAME language as the conversation.
+    """
+
+    conversation_history: str = dspy.InputField(
+        desc="Conversation history formatted as JSON to summarize"
+    )
+    summary: str = dspy.OutputField(
+        desc="Concise summary capturing key topics, facts, and information discussed. "
+        "Preserve specific details (numbers, names, dates) that could be referenced later."
+    )
+
+
+class SummaryAnalysisSignature(dspy.Signature):
+    """Analyze if a user query can be answered from a conversation summary.
+
+    Given a summary of earlier conversation and the current user query,
+    determine if the query references information from the summarized conversation.
+    If yes, generate an appropriate answer based on the summary.
+
+    The response should be in the SAME language as the user's query.
+    """
+
+    conversation_summary: str = dspy.InputField(
+        desc="Summary of earlier conversation history"
+    )
+    user_query: str = dspy.InputField(
+        desc="Current user query to check against the conversation summary"
+    )
+    analysis_result: str = dspy.OutputField(
+        desc='JSON object with: {"can_answer_from_context": bool, "answer": str|null, "reasoning": str}. '
+        "If the query references information from the summary, extract/generate the answer. "
+        "If the summary does not contain relevant information, set can_answer_from_context to false."
+    )
+
+
+class ContextDetectionResult(BaseModel):
+    """Result of Phase 1 context detection (classify only, no answer generation)."""
+
+    is_greeting: bool = Field(..., description="Whether the query is a greeting")
+    can_answer_from_context: bool = Field(
+        ..., description="Whether the query can be answered from conversation history"
+    )
+    reasoning: str = Field(..., description="Brief explanation of the detection")
+    answered_from_summary: bool = Field(
+        default=False,
+        description="Whether summary analysis was used for detection",
+    )
+    # Relevant context snippet extracted for use in Phase 2 generation
+    context_snippet: Optional[str] = Field(
+        default=None,
+        description="The relevant part of history/summary to answer from, for Phase 2",
+    )
+
+
+class ContextDetectionSignature(dspy.Signature):
+    """Detect if a user query is a greeting or can be answered from conversation history.
+
+    Phase 1 (detection only): classify the query WITHOUT generating the answer.
+
+    Supported greeting types:
+    - hello: Tere, Hello, Hi, Hei, Hey, Moi, Good morning/afternoon/evening
+    - goodbye: Nägemist, Bye, Goodbye, See you, Good night
+    - thanks: Tänan, Aitäh, Thank you, Thanks, Much appreciated
+    - casual: Tervist, Tšau, Moikka
+
+    Do NOT generate the answer here — only detect and extract a relevant context snippet.
+    """
+
+    conversation_history: str = dspy.InputField(
+        desc="Recent conversation history (last 10 turns) formatted as JSON"
+    )
+    user_query: str = dspy.InputField(desc="Current user query to classify")
+    detection_result: str = dspy.OutputField(
+        desc='JSON object with: {"is_greeting": bool, "can_answer_from_context": bool, '
+        '"reasoning": str, "context_snippet": str|null}. '
+        "context_snippet should contain the relevant excerpt from history if can_answer_from_context is true, "
+        "or null otherwise. Do NOT generate the final answer — only detect and extract."
+    )
+
+
+class ContextResponseGenerationSignature(dspy.Signature):
+    """Generate a response to a user query based on conversation history context.
+
+    Phase 2 (generation): given the user query and relevant context, generate a helpful answer.
+    Respond in the SAME language as the user query.
+    """
+
+    context_snippet: str = dspy.InputField(
+        desc="Relevant excerpt from conversation history or summary that contains the answer"
+    )
+    user_query: str = dspy.InputField(desc="Current user query to answer")
+    answer: str = dspy.OutputField(
+        desc="A helpful, natural response to the user query based on the provided context. "
+        "Respond in the same language as the user query."
+    )
+
+
+class ContextAnalyzer:
+    """
+    Analyzer for greeting detection and context-based question answering.
+
+    This class uses an LLM to intelligently detect:
+    - Greetings in multiple languages (Estonian, English)
+    - Questions that reference conversation history
+    - Generate appropriate responses based on context
+
+    Example Usage:
+        analyzer = ContextAnalyzer(llm_manager)
+        result = await analyzer.analyze_context(
+            query="Tere!",
+            conversation_history=[],
+            language="et"
+        )
+        # result.is_greeting = True
+        # result.answer = "Tere! Kuidas ma saan sind aidata?"
+    """
+
+    def __init__(self, llm_manager: Any) -> None:  # noqa: ANN401
+        """
+        Initialize the context analyzer.
+
+        Args:
+            llm_manager: LLM manager instance for making LLM calls
+        """
+        self.llm_manager = llm_manager
+        self._module: Optional[dspy.Module] = None
+        self._summary_module: Optional[dspy.Module] = None
+        self._summary_analysis_module: Optional[dspy.Module] = None
+        # Phase 1 & 2 modules for two-phase detection+generation flow
+        self._detection_module: Optional[dspy.Module] = None
+        self._response_generation_module: Optional[dspy.Module] = None
+        self._stream_predictor: Optional[Any] = None
+        logger.info("Context analyzer initialized")
+
+    def _format_conversation_history(
+        self, conversation_history: List[Dict[str, Any]], max_turns: int = 10
+    ) -> str:
+        """
+        Format conversation history for LLM consumption.
+
+        Args:
+            conversation_history: List of conversation items with authorRole, message, timestamp
+            max_turns: Maximum number of turns to include (default: 10)
+
+        Returns:
+            Formatted conversation history as JSON string
+        """
+        # Take last N turns
+        recent_history = (
+            conversation_history[-max_turns:] if conversation_history else []
+        )
+
+        # Format as readable JSON
+        formatted_history = [
+            {
+                "role": item.get("authorRole", "unknown"),
+                "message": item.get("message", ""),
+                "timestamp": item.get("timestamp", ""),
+            }
+            for item in recent_history
+        ]
+
+        if not formatted_history:
+            return "[]"
+
+        return json.dumps(formatted_history, ensure_ascii=False, indent=2)
+
+    @staticmethod
+    def _merge_cost_dicts(
+        cost1: Dict[str, Any], cost2: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Merge two cost dictionaries by summing numeric values.
+
+        Args:
+            cost1: First cost dictionary
+            cost2: Second cost dictionary
+
+        Returns:
+            Merged cost dictionary with summed values
+        """
+        return {
+            "total_cost": cost1.get("total_cost", 0) + cost2.get("total_cost", 0),
+            "total_tokens": cost1.get("total_tokens", 0) + cost2.get("total_tokens", 0),
+            "total_prompt_tokens": cost1.get("total_prompt_tokens", 0)
+            + cost2.get("total_prompt_tokens", 0),
+            "total_completion_tokens": cost1.get("total_completion_tokens", 0)
+            + cost2.get("total_completion_tokens", 0),
+            "num_calls": cost1.get("num_calls", 0) + cost2.get("num_calls", 0),
+        }
+
+    async def detect_context(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, Any]],
+    ) -> tuple[ContextDetectionResult, Dict[str, Any]]:
+        """
+        Phase 1: Detect if query is a greeting or can be answered from history.
+
+        Classify-only — no answer generated here. Returns a ContextDetectionResult
+        with is_greeting/can_answer_from_context flags and a context_snippet for
+        Phase 2 generation.
+
+        Args:
+            query: User query to classify
+            conversation_history: Full conversation history
+
+        Returns:
+            Tuple of (ContextDetectionResult, cost_dict)
+        """
+        total_turns = len(conversation_history)
+        logger.info(
+            f"CONTEXT DETECTOR: Phase 1 | Query: '{query[:100]}' | "
+            f"History: {total_turns} turns"
+        )
+
+        history_length_before = 0
+        try:
+            lm = dspy.settings.lm
+            if lm and hasattr(lm, "history"):
+                history_length_before = len(lm.history)
+        except Exception as e:
+            logger.warning(f"Failed to get LM history length for detection: {e}")
+
+        formatted_history = self._format_conversation_history(conversation_history)
+
+        self.llm_manager.ensure_global_config()
+        try:
+            with self.llm_manager.use_task_local():
+                if self._detection_module is None:
+                    self._detection_module = dspy.ChainOfThought(
+                        ContextDetectionSignature
+                    )
+                response = self._detection_module(
+                    conversation_history=formatted_history,
+                    user_query=query,
+                )
+
+            try:
+                detection_data = json.loads(response.detection_result)
+            except json.JSONDecodeError:
+                logger.warning(
+                    f"Failed to parse detection response: {response.detection_result[:100]}"
+                )
+                detection_data = {
+                    "is_greeting": False,
+                    "can_answer_from_context": False,
+                    "reasoning": "Failed to parse detection response",
+                    "context_snippet": None,
+                }
+
+            result = ContextDetectionResult(
+                is_greeting=detection_data.get("is_greeting", False),
+                can_answer_from_context=detection_data.get(
+                    "can_answer_from_context", False
+                ),
+                reasoning=detection_data.get("reasoning", "Detection completed"),
+                context_snippet=detection_data.get("context_snippet"),
+            )
+            logger.info(
+                f"DETECTION RESULT | Greeting: {result.is_greeting} | "
+                f"Can Answer: {result.can_answer_from_context} | "
+                f"Has snippet: {result.context_snippet is not None}"
+            )
+
+        except Exception as e:
+            logger.error(f"Context detection failed: {e}", exc_info=True)
+            result = ContextDetectionResult(
+                is_greeting=False,
+                can_answer_from_context=False,
+                reasoning=f"Detection error: {str(e)}",
+            )
+
+        cost_dict = get_lm_usage_since(history_length_before)
+        logger.info(
+            f"Detection cost | Total: ${cost_dict.get('total_cost', 0):.6f} | "
+            f"Tokens: {cost_dict.get('total_tokens', 0)}"
+        )
+        return result, cost_dict
+
+    async def stream_context_response(
+        self,
+        query: str,
+        context_snippet: str,
+    ) -> AsyncIterator[str]:
+        """
+        Phase 2 (streaming): Stream a generated answer using DSPy native streaming.
+
+        Uses ContextResponseGenerationSignature with DSPy's streamify() so tokens
+        are yielded in real time — same mechanism as ResponseGeneratorAgent.stream_response().
+
+        Args:
+            query: The user query to answer
+            context_snippet: Relevant context extracted during Phase 1 detection
+
+        Yields:
+            Token strings as they arrive from the LLM
+        """
+        logger.info(f"CONTEXT GENERATOR: Phase 2 streaming | Query: '{query[:100]}'")
+
+        self.llm_manager.ensure_global_config()
+        output_stream = None
+        stream_started = False
+        try:
+            with self.llm_manager.use_task_local():
+                if self._stream_predictor is None:
+                    answer_listener = StreamListener(signature_field_name="answer")
+                    self._stream_predictor = dspy.streamify(
+                        dspy.Predict(ContextResponseGenerationSignature),
+                        stream_listeners=[answer_listener],
+                    )
+                output_stream = self._stream_predictor(
+                    context_snippet=context_snippet,
+                    user_query=query,
+                )
+
+                async for chunk in output_stream:
+                    if isinstance(chunk, dspy.streaming.StreamResponse):
+                        if chunk.signature_field_name == "answer":
+                            stream_started = True
+                            yield chunk.chunk
+                    elif isinstance(chunk, dspy.Prediction):
+                        logger.info(
+                            "Context response streaming complete (final Prediction received)"
+                        )
+
+            if not stream_started:
+                logger.warning(
+                    "Context streaming finished but no 'answer' tokens received."
+                )
+        except GeneratorExit:
+            raise
+        except Exception as e:
+            logger.error(f"Error during context response streaming: {e}")
+            raise
+        finally:
+            if output_stream is not None:
+                try:
+                    await output_stream.aclose()
+                except Exception as cleanup_error:
+                    logger.debug(
+                        f"Error during context stream cleanup: {cleanup_error}"
+                    )
+
+    async def generate_context_response(
+        self,
+        query: str,
+        context_snippet: str,
+    ) -> tuple[str, Dict[str, Any]]:
+        """
+        Phase 2 (non-streaming): Generate a complete answer from context snippet.
+
+        Used for non-streaming mode after Phase 1 detection confirms context can answer.
+
+        Args:
+            query: The user query to answer
+            context_snippet: Relevant context extracted during Phase 1 detection
+
+        Returns:
+            Tuple of (answer_text, cost_dict)
+        """
+        logger.info(
+            f"CONTEXT GENERATOR: Phase 2 non-streaming | Query: '{query[:100]}'"
+        )
+
+        history_length_before = 0
+        try:
+            lm = dspy.settings.lm
+            if lm and hasattr(lm, "history"):
+                history_length_before = len(lm.history)
+        except Exception as e:
+            logger.warning(f"Failed to get LM history length for generation: {e}")
+
+        self.llm_manager.ensure_global_config()
+        answer = ""
+        try:
+            with self.llm_manager.use_task_local():
+                if self._response_generation_module is None:
+                    self._response_generation_module = dspy.ChainOfThought(
+                        ContextResponseGenerationSignature
+                    )
+                response = self._response_generation_module(
+                    context_snippet=context_snippet,
+                    user_query=query,
+                )
+                answer = getattr(response, "answer", "") or ""
+                logger.info(
+                    f"Context response generated: {len(answer)} chars | "
+                    f"Preview: '{answer[:150]}'"
+                )
+        except Exception as e:
+            logger.error(f"Context response generation failed: {e}", exc_info=True)
+
+        cost_dict = get_lm_usage_since(history_length_before)
+        logger.info(
+            f"Generation cost | Total: ${cost_dict.get('total_cost', 0):.6f} | "
+            f"Tokens: {cost_dict.get('total_tokens', 0)}"
+        )
+        return answer, cost_dict
+
+    async def _generate_conversation_summary(
+        self,
+        older_history: List[Dict[str, Any]],
+    ) -> tuple[str, Dict[str, Any]]:
+        """
+        Generate a concise summary of older conversation turns.
+
+        Args:
+            older_history: Conversation turns older than the recent 10
+
+        Returns:
+            Tuple of (summary_text, cost_dict)
+        """
+        logger.info(f"SUMMARY GENERATION: Summarizing {len(older_history)} older turns")
+
+        # Track costs
+        history_length_before = 0
+        try:
+            lm = dspy.settings.lm
+            if lm and hasattr(lm, "history"):
+                history_length_before = len(lm.history)
+        except Exception as e:
+            logger.warning(f"Failed to get LM history length for summary: {e}")
+
+        # Format older history
+        formatted_history = self._format_conversation_history(
+            older_history, max_turns=len(older_history)
+        )
+
+        # Initialize and run summary module within task-local LLM config
+        try:
+            self.llm_manager.ensure_global_config()
+            with self.llm_manager.use_task_local():
+                if self._summary_module is None:
+                    self._summary_module = dspy.ChainOfThought(
+                        ConversationSummarySignature
+                    )
+                response = self._summary_module(
+                    conversation_history=formatted_history,
+                )
+                summary = response.summary
+                logger.info(
+                    f"Summary generated: {len(summary)} chars | "
+                    f"Preview: '{summary[:150]}...'"
+                )
+        except Exception as e:
+            logger.error(f"Summary generation failed: {e}", exc_info=True)
+            summary = ""
+
+        cost_dict = get_lm_usage_since(history_length_before)
+        logger.info(
+            f"Summary cost | Total: ${cost_dict.get('total_cost', 0):.6f} | "
+            f"Tokens: {cost_dict.get('total_tokens', 0)}"
+        )
+
+        return summary, cost_dict
+
+    async def _analyze_from_summary(
+        self,
+        query: str,
+        summary: str,
+    ) -> tuple[ContextAnalysisResult, Dict[str, Any]]:
+        """
+        Check if a query can be answered from a conversation summary.
+
+        Args:
+            query: User query to check
+            summary: Summary of older conversation turns
+
+        Returns:
+            Tuple of (ContextAnalysisResult, cost_dict)
+        """
+        logger.info(
+            f"SUMMARY ANALYSIS: Checking query against summary | Query: '{query[:100]}'"
+        )
+
+        # Ensure DSPy is configured and run analysis in a task-local LM context
+        self.llm_manager.ensure_global_config()
+        history_length_before = 0
+        with self.llm_manager.use_task_local():
+            # Track costs
+            try:
+                lm = dspy.settings.lm
+                if lm and hasattr(lm, "history"):
+                    history_length_before = len(lm.history)
+            except Exception as e:
+                logger.warning(
+                    f"Failed to get LM history length for summary analysis: {e}"
+                )
+            # Initialize summary analysis module if needed
+            if self._summary_analysis_module is None:
+                self._summary_analysis_module = dspy.ChainOfThought(
+                    SummaryAnalysisSignature
+                )
+            try:
+                response = self._summary_analysis_module(
+                    conversation_summary=summary,
+                    user_query=query,
+                )
+                # Parse JSON response
+                try:
+                    analysis_data = json.loads(response.analysis_result)
+                except json.JSONDecodeError:
+                    logger.warning(
+                        f"Failed to parse summary analysis response: "
+                        f"{response.analysis_result[:100]}"
+                    )
+                    analysis_data = {
+                        "can_answer_from_context": False,
+                        "answer": None,
+                        "reasoning": "Failed to parse summary analysis response",
+                    }
+                can_answer = analysis_data.get("can_answer_from_context", False)
+                answer = analysis_data.get("answer")
+                reasoning = analysis_data.get("reasoning", "Summary analysis completed")
+                logger.debug(
+                    f"Raw summary analysis parsed | "
+                    f"can_answer_from_context={can_answer} | "
+                    f"has_answer={answer is not None}"
+                )
+                # Only mark as answerable when both the LLM flag is True AND an answer exists
+                can_answer_from_context = bool(can_answer and answer)
+                result = ContextAnalysisResult(
+                    is_greeting=False,
+                    can_answer_from_context=can_answer_from_context,
+                    answer=answer,
+                    reasoning=reasoning,
+                    answered_from_summary=can_answer_from_context,
+                )
+                logger.info(
+                    "SUMMARY ANALYSIS RESULT | "
+                    f"Can answer from summary: {can_answer} | "
+                    f"Can answer from context: {can_answer_from_context} | "
+                    f"Has answer: {answer is not None} | Reasoning: {reasoning}"
+                )
+            except Exception as e:
+                logger.error(f"Summary analysis failed: {e}", exc_info=True)
+                result = ContextAnalysisResult(
+                    is_greeting=False,
+                    can_answer_from_context=False,
+                    answer=None,
+                    reasoning=f"Summary analysis error: {str(e)}",
+                )
+
+        cost_dict = get_lm_usage_since(history_length_before)
+        logger.info(
+            f"Summary analysis cost | Total: ${cost_dict.get('total_cost', 0):.6f} | "
+            f"Tokens: {cost_dict.get('total_tokens', 0)}"
+        )
+
+        return result, cost_dict
+
+    async def analyze_context(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, Any]],
+        language: str = "et",
+    ) -> tuple[ContextAnalysisResult, Dict[str, Any]]:
+        """
+        Analyze if query is a greeting or can be answered from conversation history.
+
+        Implements a 3-step flow:
+        1. Analyze recent 10 turns for greetings and history-answerable queries
+        2. If cannot answer and total history > 10 turns, generate a summary of older turns
+        3. Check if the query can be answered from the summary
+        4. If still cannot answer, return cannot-answer result (falls through to RAG)
+
+        Args:
+            query: User query to analyze
+            conversation_history: List of conversation items
+            language: Language code (et, en) for response generation
+
+        Returns:
+            Tuple of (ContextAnalysisResult, cost_dict)
+        """
+        total_turns = len(conversation_history)
+        logger.info(
+            f"CONTEXT ANALYZER: Starting analysis | Query: '{query[:100]}' | "
+            f"History: {total_turns} turns | Language: {language}"
+        )
+
+        # STEP 1: Analyze recent 10 turns (existing behavior)
+        result, cost_dict = await self._analyze_recent_history(
+            query=query,
+            conversation_history=conversation_history,
+            language=language,
+        )
+
+        # If greeting or can answer from recent history, return immediately
+        if (result.is_greeting or result.can_answer_from_context) and result.answer:
+            logger.info(
+                f"Answered from recent history | "
+                f"Greeting: {result.is_greeting} | From context: {result.can_answer_from_context}"
+            )
+            return result, cost_dict
+
+        # STEP 2 & 3: If history > 10 turns and couldn't answer from recent, try summary
+        if total_turns > 10:
+            logger.info(
+                f"History exceeds 10 turns ({total_turns} total) | "
+                f"Cannot answer from recent 10 | Attempting summary-based analysis"
+            )
+
+            # Get older turns (everything before the last 10)
+            older_history = conversation_history[:-10]
+            logger.info(f"Older history: {len(older_history)} turns to summarize")
+
+            try:
+                # Generate summary of older turns
+                summary, summary_cost = await self._generate_conversation_summary(
+                    older_history
+                )
+                cost_dict = self._merge_cost_dicts(cost_dict, summary_cost)
+
+                if summary:
+                    # Analyze query against summary
+                    summary_result, analysis_cost = await self._analyze_from_summary(
+                        query=query,
+                        summary=summary,
+                    )
+                    cost_dict = self._merge_cost_dicts(cost_dict, analysis_cost)
+
+                    if summary_result.can_answer_from_context and summary_result.answer:
+                        logger.info(
+                            f"Answered from conversation summary | "
+                            f"Reasoning: {summary_result.reasoning}"
+                        )
+                        return summary_result, cost_dict
+
+                    logger.info(
+                        "Cannot answer from summary either | Falling back to RAG"
+                    )
+                else:
+                    logger.warning(
+                        "Summary generation returned empty | Falling back to RAG"
+                    )
+
+            except Exception as e:
+                logger.error(f"Summary-based analysis failed: {e}", exc_info=True)
+        else:
+            logger.info(
+                f"History has {total_turns} turns (<= 10) | "
+                f"No summary needed | Falling back to RAG"
+            )
+
+        # Cannot answer from context at all
+        logger.info(
+            f"CONTEXT ANALYZER FINAL DECISION | "
+            f"can_answer_from_context={result.can_answer_from_context} | "
+            f"is_greeting={result.is_greeting} | "
+            f"answered_from_summary={result.answered_from_summary} | "
+            f"has_answer={result.answer is not None} | "
+            f"action={'RESPOND' if (result.can_answer_from_context or result.is_greeting) and result.answer else 'FALLBACK_TO_RAG'}"
+        )
+        return result, cost_dict
+
+    async def _analyze_recent_history(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, Any]],
+        language: str = "et",
+    ) -> tuple[ContextAnalysisResult, Dict[str, Any]]:
+        """
+        Analyze the query against the most recent conversation turns.
+
+        This is the original analysis logic extracted into its own method.
+        Checks for greetings and history-answerable queries in the last 10 turns.
+
+        Args:
+            query: User query to analyze
+            conversation_history: Full conversation history (last 10 will be used)
+            language: Language code for response generation
+
+        Returns:
+            Tuple of (ContextAnalysisResult, cost_dict)
+        """
+        logger.info("STEP 1: Analyzing recent history (last 10 turns)")
+
+        # Track LLM history for cost calculation
+        history_length_before = 0
+        try:
+            lm = dspy.settings.lm
+            if lm and hasattr(lm, "history"):
+                history_length_before = len(lm.history)
+        except Exception as e:
+            logger.warning(f"Failed to get LM history length: {e}")
+
+        # Format conversation history (last 10 turns)
+        formatted_history = self._format_conversation_history(conversation_history)
+
+        # Ensure LM is configured and use task-local context for DSPy operations
+        self.llm_manager.ensure_global_config()
+        try:
+            with self.llm_manager.use_task_local():
+                # Initialize DSPy module if not already done
+                if self._module is None:
+                    self._module = dspy.ChainOfThought(ContextAnalysisSignature)
+                # Call LLM for analysis
+                logger.info(
+                    "Calling LLM for context analysis (greeting/history check)..."
+                )
+                response = self._module(
+                    conversation_history=formatted_history,
+                    user_query=query,
+                )
+
+            # Parse the analysis result
+            analysis_json = response.analysis_result
+
+            # Try to parse JSON response
+            try:
+                analysis_data = json.loads(analysis_json)
+                logger.debug(
+                    f"Raw LLM response parsed | "
+                    f"can_answer_from_context={analysis_data.get('can_answer_from_context')} | "
+                    f"is_greeting={analysis_data.get('is_greeting')} | "
+                    f"has_answer={analysis_data.get('answer') is not None}"
+                )
+            except json.JSONDecodeError:
+                logger.warning(
+                    f"Failed to parse LLM response as JSON: {analysis_json[:100]}"
+                )
+                # Fallback: treat as cannot answer
+                analysis_data = {
+                    "is_greeting": False,
+                    "can_answer_from_context": False,
+                    "answer": None,
+                    "reasoning": "Failed to parse LLM response",
+                }
+
+            # Create result object
+            result = ContextAnalysisResult(
+                is_greeting=analysis_data.get("is_greeting", False),
+                can_answer_from_context=analysis_data.get(
+                    "can_answer_from_context", False
+                ),
+                answer=analysis_data.get("answer"),
+                reasoning=analysis_data.get("reasoning", "Analysis completed"),
+            )
+
+            logger.info(
+                f"ANALYSIS RESULT | Greeting: {result.is_greeting} | "
+                f"Can Answer from Context: {result.can_answer_from_context} | "
+                f"Answer: {result.answer[:100] if result.answer else None} | "
+                f"Reasoning: {result.reasoning}"
+            )
+
+            # If greeting detected but LLM didn't generate an answer, use fallback
+            if result.is_greeting and result.answer is None:
+                greeting_type = self._detect_greeting_type(query)
+                fallback_answer = get_greeting_response(greeting_type, language)
+                result = ContextAnalysisResult(
+                    is_greeting=result.is_greeting,
+                    can_answer_from_context=result.can_answer_from_context,
+                    answer=fallback_answer,
+                    reasoning=result.reasoning,
+                )
+
+        except Exception as e:
+            logger.error(f"Context analysis failed: {e}", exc_info=True)
+            # Fallback result
+            result = ContextAnalysisResult(
+                is_greeting=False,
+                can_answer_from_context=False,
+                answer=None,
+                reasoning=f"Analysis error: {str(e)}",
+            )
+
+        # Calculate costs
+        cost_dict = get_lm_usage_since(history_length_before)
+        logger.info(
+            f"Cost tracking | Total cost: ${cost_dict.get('total_cost', 0):.6f} | "
+            f"Tokens: {cost_dict.get('total_tokens', 0)} | "
+            f"Calls: {cost_dict.get('num_calls', 0)}"
+        )
+
+        return result, cost_dict
+
+    def _detect_greeting_type(self, query: str) -> str:
+        """
+        Detect the type of greeting from the query text.
+
+        Args:
+            query: User query string
+
+        Returns:
+            Greeting type: 'thanks', 'goodbye', 'casual', or 'hello' (default)
+        """
+        query_lower = query.lower().strip()
+        thanks_keywords = ["thank", "thanks", "tänan", "aitäh", "tänud"]
+        goodbye_keywords = ["bye", "goodbye", "nägemist", "tsau", "tšau", "head aega"]
+        casual_keywords = ["hei", "hey", "moi", "moikka"]
+        for kw in thanks_keywords:
+            if kw in query_lower:
+                return "thanks"
+        for kw in goodbye_keywords:
+            if kw in query_lower:
+                return "goodbye"
+        for kw in casual_keywords:
+            if kw in query_lower:
+                return "casual"
+        return "hello"
+
+    def get_fallback_greeting_response(self, language: str = "et") -> str:
+        """
+        Get a fallback greeting response without LLM call.
+
+        Used when LLM-based greeting detection fails but we still want
+        to provide a friendly response.
+
+        Args:
+            language: Language code (et, en)
+
+        Returns:
+            Greeting message in the specified language
+        """
+        greetings = {
+            "et": "Tere! Kuidas ma saan sind aidata?",
+            "en": "Hello! How can I help you?",
+        }
+        return greetings.get(language, greetings["en"])
diff --git a/src/tool_classifier/greeting_constants.py b/src/tool_classifier/greeting_constants.py
new file mode 100644
index 0000000..272d6a4
--- /dev/null
+++ b/src/tool_classifier/greeting_constants.py
@@ -0,0 +1,40 @@
+"""Constants for greeting responses in multiple languages."""
+
+from typing import Dict
+
+# Estonian greeting responses
+GREETINGS_ET: Dict[str, str] = {
+    "hello": "Tere! Kuidas ma saan sind aidata?",
+    "goodbye": "Nägemist! Head päeva!",
+    "thanks": "Palun! Kui on veel küsimusi, küsi julgelt.",
+    "casual": "Tere! Mida ma saan sinu jaoks teha?",
+}
+
+# English greeting responses
+GREETINGS_EN: Dict[str, str] = {
+    "hello": "Hello! How can I help you?",
+    "goodbye": "Goodbye! Have a great day!",
+    "thanks": "You're welcome! Feel free to ask if you have more questions.",
+    "casual": "Hey! What can I do for you?",
+}
+
+# Language-specific greeting mappings
+GREETINGS_BY_LANGUAGE: Dict[str, Dict[str, str]] = {
+    "et": GREETINGS_ET,
+    "en": GREETINGS_EN,
+}
+
+
+def get_greeting_response(greeting_type: str = "hello", language: str = "et") -> str:
+    """
+    Get a greeting response for a specific type and language.
+
+    Args:
+        greeting_type: Type of greeting (hello, goodbye, thanks, casual)
+        language: Language code (et, en)
+
+    Returns:
+        Greeting message in the specified language
+    """
+    language_greetings = GREETINGS_BY_LANGUAGE.get(language, GREETINGS_EN)
+    return language_greetings.get(greeting_type, language_greetings["hello"])
diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py
index dc23e8b..2c83769 100644
--- a/src/tool_classifier/workflows/context_workflow.py
+++ b/src/tool_classifier/workflows/context_workflow.py
@@ -1,10 +1,22 @@
 """Context workflow executor - Layer 2: Conversation history and greetings."""
 
 from typing import Any, AsyncIterator, Dict, Optional
+import time
+import dspy
 from loguru import logger
 
 from models.request_models import OrchestrationRequest, OrchestrationResponse
 from tool_classifier.base_workflow import BaseWorkflow
+from tool_classifier.context_analyzer import ContextAnalyzer, ContextDetectionResult
+from tool_classifier.workflows.service_workflow import LLMServiceProtocol
+from src.guardrails.nemo_rails_adapter import NeMoRailsAdapter
+from src.llm_orchestrator_config.llm_manager import LLMManager
+from src.utils.cost_utils import get_lm_usage_since
+from src.utils.language_detector import detect_language
+from src.llm_orchestrator_config.llm_ochestrator_constants import (
+    GUARDRAILS_BLOCKED_PHRASES,
+    OUTPUT_GUARDRAIL_VIOLATION_MESSAGE,
+)
 
 
 class ContextWorkflowExecutor(BaseWorkflow):
@@ -12,24 +24,222 @@ class ContextWorkflowExecutor(BaseWorkflow):
     Handles greetings and conversation history queries (Layer 2).
 
     Detects:
-    - Greetings: "Hello", "Thanks", "Goodbye"
+    - Greetings: "Hello", "Thanks", "Goodbye" (multilingual: Estonian, English)
     - History references: "What did you say earlier?", "Can you repeat that?"
 
     Uses LLM for semantic detection (multilingual), no regex patterns.
 
-    Status: SKELETON - Returns None (fallback to RAG)
-    TODO: Implement greeting/context detection, answer extraction, guardrails
+    Implementation Strategy:
+    1. Detect language from user query
+    2. Use ContextAnalyzer (LLM-based) to check if:
+       - Query is a greeting -> generate friendly response
+       - Query references conversation history -> extract answer
+    3. If can answer -> return response
+    4. Otherwise -> return None (fallback to RAG)
+
+    Cost Tracking:
+    - Tracks LLM costs for context analysis
+    - Logs via orchestration_service.log_costs() (same as service/RAG workflows)
     """
 
-    def __init__(self, llm_manager: Any):
+    def __init__(
+        self,
+        llm_manager: LLMManager,
+        orchestration_service: Optional[LLMServiceProtocol] = None,
+    ) -> None:
         """
         Initialize context workflow executor.
 
         Args:
             llm_manager: LLM manager for context analysis
+            orchestration_service: Reference to LLMOrchestrationService for cost logging
         """
         self.llm_manager = llm_manager
-        logger.info("Context workflow executor initialized (skeleton)")
+        self.orchestration_service = orchestration_service
+        self.context_analyzer = ContextAnalyzer(llm_manager)
+        logger.info("Context workflow executor initialized")
+
+    @staticmethod
+    def _build_history(request: OrchestrationRequest) -> list[Dict[str, Any]]:
+        return [
+            {
+                "authorRole": item.authorRole,
+                "message": item.message,
+                "timestamp": item.timestamp,
+            }
+            for item in request.conversationHistory
+        ]
+
+    async def _detect(
+        self,
+        message: str,
+        history: list[Dict[str, Any]],
+        time_metric: Dict[str, float],
+        costs_metric: Dict[str, Dict[str, Any]],
+    ) -> Optional[ContextDetectionResult]:
+        """Phase 1: run context detection. Returns ContextDetectionResult or None on error."""
+        try:
+            start = time.time()
+            result, cost = await self.context_analyzer.detect_context(
+                query=message, conversation_history=history
+            )
+            time_metric["context.detection"] = time.time() - start
+            costs_metric["context_detection"] = cost
+            return result
+        except Exception as e:
+            logger.error(f"Phase 1 detection failed: {e}", exc_info=True)
+            return None
+
+    def _log_costs(self, costs_metric: Dict[str, Dict[str, Any]]) -> None:
+        if self.orchestration_service:
+            self.orchestration_service.log_costs(costs_metric)
+
+    @staticmethod
+    def _is_guardrail_violation(chunk: str) -> bool:
+        """Return True if the chunk matches a known guardrail blocked phrase."""
+        chunk_lower = chunk.strip().lower()
+        return any(
+            phrase.lower() in chunk_lower
+            and len(chunk_lower) <= len(phrase.lower()) + 20
+            for phrase in GUARDRAILS_BLOCKED_PHRASES
+        )
+
+    async def _generate_response_async(
+        self,
+        request: OrchestrationRequest,
+        context_snippet: str,
+        time_metric: Dict[str, float],
+        costs_metric: Dict[str, Dict[str, Any]],
+    ) -> Optional[OrchestrationResponse]:
+        """Non-streaming: Generate response + apply output guardrails."""
+        try:
+            start = time.time()
+            answer, cost = await self.context_analyzer.generate_context_response(
+                query=request.message, context_snippet=context_snippet
+            )
+            time_metric["context.generation"] = time.time() - start
+            costs_metric["context_response"] = cost
+        except Exception as e:
+            logger.error(f"Phase 2 generation failed: {e}", exc_info=True)
+            self._log_costs(costs_metric)
+            return None
+
+        if not answer:
+            logger.warning(f"[{request.chatId}] Phase 2 empty answer — fallback to RAG")
+            self._log_costs(costs_metric)
+            return None
+
+        response = OrchestrationResponse(
+            chatId=request.chatId,
+            llmServiceActive=True,
+            questionOutOfLLMScope=False,
+            inputGuardFailed=False,
+            content=answer,
+        )
+        if self.orchestration_service:
+            try:
+                components = self.orchestration_service._initialize_service_components(
+                    request
+                )
+                response = await self.orchestration_service.handle_output_guardrails(
+                    guardrails_adapter=components.get("guardrails_adapter"),
+                    generated_response=response,
+                    request=request,
+                    costs_metric=costs_metric,
+                )
+            except Exception as e:
+                logger.warning(
+                    f"[{request.chatId}] Output guardrails check failed: {e}"
+                )
+            self._log_costs(costs_metric)
+        return response
+
+    async def _stream_history_generator(
+        self,
+        chat_id: str,
+        query: str,
+        context_snippet: str,
+        history_length_before: int,
+        guardrails_adapter: NeMoRailsAdapter,
+        costs_metric: Dict[str, Dict[str, Any]],
+    ) -> AsyncIterator[str]:
+        """Async generator: stream history answer through NeMo Guardrails."""
+        bot_generator = self.context_analyzer.stream_context_response(
+            query=query, context_snippet=context_snippet
+        )
+        orchestration_service = self.orchestration_service
+        if orchestration_service is None:
+            return
+        async for validated_chunk in guardrails_adapter.stream_with_guardrails(
+            user_message=query, bot_message_generator=bot_generator
+        ):
+            if isinstance(validated_chunk, str) and self._is_guardrail_violation(
+                validated_chunk
+            ):
+                logger.warning(f"[{chat_id}] Guardrails violation in context streaming")
+                yield orchestration_service.format_sse(
+                    chat_id, OUTPUT_GUARDRAIL_VIOLATION_MESSAGE
+                )
+                yield orchestration_service.format_sse(chat_id, "END")
+                costs_metric["context_response"] = get_lm_usage_since(
+                    history_length_before
+                )
+                orchestration_service.log_costs(costs_metric)
+                return
+            yield orchestration_service.format_sse(chat_id, validated_chunk)
+        yield orchestration_service.format_sse(chat_id, "END")
+        logger.info(f"[{chat_id}] Context streaming complete")
+        costs_metric["context_response"] = get_lm_usage_since(history_length_before)
+        orchestration_service.log_costs(costs_metric)
+
+    async def _create_history_stream(
+        self,
+        request: OrchestrationRequest,
+        context_snippet: str,
+        costs_metric: Dict[str, Dict[str, Any]],
+    ) -> Optional[AsyncIterator[str]]:
+        """Set up guardrails adapter and return the history streaming generator."""
+        if not self.orchestration_service:
+            logger.warning(
+                f"[{request.chatId}] No orchestration_service — cannot stream with guardrails"
+            )
+            return None
+        try:
+            components = self.orchestration_service._initialize_service_components(
+                request
+            )
+            guardrails_adapter = components.get("guardrails_adapter")
+        except Exception as e:
+            logger.error(
+                f"[{request.chatId}] Failed to initialize components: {e}",
+                exc_info=True,
+            )
+            self._log_costs(costs_metric)
+            return None
+
+        if not isinstance(guardrails_adapter, NeMoRailsAdapter):
+            logger.warning(
+                f"[{request.chatId}] guardrails_adapter unavailable — cannot stream"
+            )
+            self._log_costs(costs_metric)
+            return None
+
+        history_length_before = 0
+        try:
+            lm = dspy.settings.lm
+            if lm and hasattr(lm, "history"):
+                history_length_before = len(lm.history)
+        except Exception:
+            pass
+
+        return self._stream_history_generator(
+            chat_id=request.chatId,
+            query=request.message,
+            context_snippet=context_snippet,
+            history_length_before=history_length_before,
+            guardrails_adapter=guardrails_adapter,
+            costs_metric=costs_metric,
+        )
 
     async def execute_async(
         self,
@@ -38,26 +248,62 @@ async def execute_async(
         time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[OrchestrationResponse]:
         """
-        Execute context workflow in non-streaming mode.
-
-        TODO: Check greeting (LLM) → generate response, OR check history (last 10 turns)
-        → extract answer → validate with guardrails. Return None if cannot answer.
+        Execute context workflow in non-streaming mode (two-phase).
 
-        Args:
-            request: Orchestration request with user query and history
-            context: Metadata with is_greeting, can_answer_from_history flags
-            time_metric: Optional timing dictionary for future timing tracking
+        Phase 1: Detect if query is a greeting or can be answered from history.
+        Phase 2: Generate response (greetings: pre-built; history: LLM + guardrails).
 
         Returns:
-            OrchestrationResponse with context-based answer or None to fallback
+            OrchestrationResponse or None to fallback to RAG
         """
-        logger.debug(
-            f"[{request.chatId}] Context workflow execute_async called "
-            f"(not implemented - returning None)"
+        logger.info(
+            f"[{request.chatId}] CONTEXT WORKFLOW (NON-STREAMING) | "
+            f"Query: '{request.message[:100]}'"
         )
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        if time_metric is None:
+            time_metric = {}
+
+        language = detect_language(request.message)
+        history = self._build_history(request)
 
-        # TODO: Implement context workflow logic here
-        # For now, return None to trigger fallback to next layer (RAG)
+        detection_result = await self._detect(
+            request.message, history, time_metric, costs_metric
+        )
+        if detection_result is None:
+            self._log_costs(costs_metric)
+            return None
+
+        logger.info(
+            f"[{request.chatId}] Detection: greeting={detection_result.is_greeting} "
+            f"can_answer={detection_result.can_answer_from_context}"
+        )
+
+        if detection_result.is_greeting:
+            from src.tool_classifier.greeting_constants import get_greeting_response
+
+            greeting = get_greeting_response(language=language)
+            self._log_costs(costs_metric)
+            return OrchestrationResponse(
+                chatId=request.chatId,
+                llmServiceActive=True,
+                questionOutOfLLMScope=False,
+                inputGuardFailed=False,
+                content=greeting,
+            )
+
+        if (
+            detection_result.can_answer_from_context
+            and detection_result.context_snippet
+        ):
+            return await self._generate_response_async(
+                request, detection_result.context_snippet, time_metric, costs_metric
+            )
+
+        logger.warning(
+            f"[{request.chatId}] Cannot answer from context — falling back to RAG"
+        )
+        self._log_costs(costs_metric)
         return None
 
     async def execute_streaming(
@@ -67,24 +313,62 @@ async def execute_streaming(
         time_metric: Optional[Dict[str, float]] = None,
     ) -> Optional[AsyncIterator[str]]:
         """
-        Execute context workflow in streaming mode.
+        Execute context workflow in streaming mode (two-phase).
 
-        TODO: Get answer (greeting/history) → validate BEFORE streaming → chunk and
-        yield as SSE. Return None if cannot answer.
-
-        Args:
-            request: Orchestration request with user query and history
-            context: Metadata with is_greeting, can_answer_from_history flags
-            time_metric: Optional timing dictionary for future timing tracking
+        Phase 1: Detect context (blocking, fast — classification only).
+        Phase 2: Stream answer through NeMo Guardrails (same pipeline as RAG).
 
         Returns:
-            AsyncIterator yielding SSE strings or None to fallback
+            AsyncIterator yielding SSE strings or None to fallback to RAG
         """
-        logger.debug(
-            f"[{request.chatId}] Context workflow execute_streaming called "
-            f"(not implemented - returning None)"
+        logger.info(
+            f"[{request.chatId}] CONTEXT WORKFLOW (STREAMING) | "
+            f"Query: '{request.message[:100]}'"
+        )
+        costs_metric: Dict[str, Dict[str, Any]] = {}
+        if time_metric is None:
+            time_metric = {}
+
+        language = detect_language(request.message)
+        history = self._build_history(request)
+
+        detection_result = await self._detect(
+            request.message, history, time_metric, costs_metric
+        )
+        if detection_result is None:
+            self._log_costs(costs_metric)
+            return None
+
+        logger.info(
+            f"[{request.chatId}] Detection: greeting={detection_result.is_greeting} "
+            f"can_answer={detection_result.can_answer_from_context}"
         )
 
-        # TODO: Implement context streaming logic here
-        # For now, return None to trigger fallback to next layer (RAG)
+        if detection_result.is_greeting:
+            from src.tool_classifier.greeting_constants import get_greeting_response
+
+            greeting = get_greeting_response(language=language)
+            orchestration_service = self.orchestration_service
+            chat_id = request.chatId
+
+            async def _stream_greeting() -> AsyncIterator[str]:
+                if orchestration_service:
+                    yield orchestration_service.format_sse(chat_id, greeting)
+                    yield orchestration_service.format_sse(chat_id, "END")
+                    orchestration_service.log_costs(costs_metric)
+
+            return _stream_greeting()
+
+        if (
+            detection_result.can_answer_from_context
+            and detection_result.context_snippet
+        ):
+            return await self._create_history_stream(
+                request, detection_result.context_snippet, costs_metric
+            )
+
+        logger.warning(
+            f"[{request.chatId}] Cannot answer from context — falling back to RAG"
+        )
+        self._log_costs(costs_metric)
         return None
diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py
index b5da35b..1b3ba4d 100644
--- a/src/tool_classifier/workflows/rag_workflow.py
+++ b/src/tool_classifier/workflows/rag_workflow.py
@@ -64,7 +64,7 @@ async def execute_async(
 
         Args:
             request: Orchestration request with user query
-            context: Unused (RAG doesn't need classification metadata)
+            context: May contain pre-initialized "components" to avoid duplicate init
             time_metric: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
@@ -79,8 +79,12 @@ async def execute_async(
         if time_metric is None:
             time_metric = {}
 
-        # Initialize service components
-        components = self.orchestration_service._initialize_service_components(request)
+        # Reuse components from context if available, otherwise initialize
+        components = context.get("components")
+        if components is None:
+            components = self.orchestration_service._initialize_service_components(
+                request
+            )
 
         # Call existing RAG pipeline with "rag" prefix for namespacing
         response = await self.orchestration_service._execute_orchestration_pipeline(
@@ -105,6 +109,10 @@ async def execute_streaming(
         """
         Execute RAG workflow in streaming mode.
 
+        Returns an AsyncIterator (not an async generator) so callers can
+        safely use ``await workflow.execute_streaming(...)`` without hitting
+        a TypeError from awaiting an async generator.
+
         Delegates to existing streaming pipeline which handles:
         - Prompt refinement (blocking)
         - Chunk retrieval (blocking)
@@ -118,7 +126,7 @@ async def execute_streaming(
 
         Args:
             request: Orchestration request with user query
-            context: Unused (RAG doesn't need classification metadata)
+            context: May contain pre-initialized "components" and "stream_ctx"
             time_metric: Optional timing dictionary from parent (for unified tracking)
 
         Returns:
@@ -143,8 +151,7 @@ async def execute_streaming(
         # Get stream context from context if provided, otherwise create minimal tracking
         stream_ctx = context.get("stream_ctx")
         if stream_ctx is None:
-            # Create minimal stream context when called via tool classifier
-            # In production flow, this is provided by stream_orchestration_response
+
             class MinimalStreamContext:
                 """Minimal stream context for RAG workflow when called directly."""
 
@@ -154,25 +161,29 @@ def __init__(self, chat_id: str) -> None:
                     self.bot_generator = None
 
                 def mark_completed(self) -> None:
-                    """No-op: Tracking handled by orchestration service."""
+                    # Intentionally empty: lifecycle tracking is handled by the orchestration service, not this minimal context
                     pass
 
                 def mark_cancelled(self) -> None:
-                    """No-op: Tracking handled by orchestration service."""
+                    # Intentionally empty: lifecycle tracking is handled by the orchestration service, not this minimal context
                     pass
 
                 def mark_error(self, error_id: str) -> None:
-                    """No-op: Tracking handled by orchestration service."""
+                    # Intentionally empty: lifecycle tracking is handled by the orchestration service, not this minimal context
                     pass
 
             stream_ctx = MinimalStreamContext(request.chatId)
 
-        # Delegate to core RAG pipeline (bypasses classifier to avoid recursion)
-        async for sse_chunk in self.orchestration_service._stream_rag_pipeline(
-            request=request,
-            components=components,
-            stream_ctx=stream_ctx,
-            costs_metric=costs_metric,
-            time_metric=time_metric,
-        ):
-            yield sse_chunk
+        # Return an inner async generator so this method stays a coroutine.
+        # This avoids the TypeError when callers do ``await execute_streaming(...)``.
+        async def _stream() -> AsyncIterator[str]:
+            async for sse_chunk in self.orchestration_service._stream_rag_pipeline(
+                request=request,
+                components=components,
+                stream_ctx=stream_ctx,
+                costs_metric=costs_metric,
+                time_metric=time_metric,
+            ):
+                yield sse_chunk
+
+        return _stream()
diff --git a/src/tool_classifier/workflows/service_workflow.py b/src/tool_classifier/workflows/service_workflow.py
index bb72f78..7882550 100644
--- a/src/tool_classifier/workflows/service_workflow.py
+++ b/src/tool_classifier/workflows/service_workflow.py
@@ -6,6 +6,7 @@
 import httpx
 from loguru import logger
 
+from src.guardrails.nemo_rails_adapter import NeMoRailsAdapter
 from src.utils.cost_utils import get_lm_usage_since
 
 from models.request_models import (
@@ -73,6 +74,22 @@ def log_costs(self, costs_metric: Dict[str, Dict[str, Any]]) -> None:
         """
         ...
 
+    def _initialize_service_components(
+        self, request: OrchestrationRequest
+    ) -> Dict[str, Any]:
+        """Initialize and return service components dictionary."""
+        ...
+
+    async def handle_output_guardrails(
+        self,
+        guardrails_adapter: Optional[NeMoRailsAdapter],
+        generated_response: OrchestrationResponse,
+        request: OrchestrationRequest,
+        costs_metric: Dict[str, Dict[str, Any]],
+    ) -> OrchestrationResponse:
+        """Apply output guardrails to the generated response."""
+        ...
+
 
 class ServiceWorkflowExecutor(BaseWorkflow):
     """Executes external service calls via Ruuter endpoints (Layer 1)."""
diff --git a/src/utils/rate_limiter.py b/src/utils/rate_limiter.py
index 4b88d9d..5de46d8 100644
--- a/src/utils/rate_limiter.py
+++ b/src/utils/rate_limiter.py
@@ -1,8 +1,8 @@
-"""Rate limiter for streaming endpoints with sliding window and token bucket algorithms."""
+"""Rate limiter for streaming endpoints with sliding window algorithms."""
 
 import time
 from collections import defaultdict, deque
-from typing import Dict, Deque, Tuple, Optional, Any
+from typing import Dict, Deque, Optional, Any
 from threading import Lock
 
 from loguru import logger
@@ -31,11 +31,11 @@ class RateLimitResult(BaseModel):
 
 class RateLimiter:
     """
-    In-memory rate limiter with sliding window (requests/minute) and token bucket (tokens/second).
+    In-memory rate limiter using sliding windows for both requests and tokens.
 
     Features:
     - Sliding window for request rate limiting (e.g., 10 requests per minute)
-    - Token bucket for burst control (e.g., 100 tokens per second)
+    - Sliding window for token rate limiting (e.g., 40,000 tokens per minute)
     - Per-user tracking with authorId
     - Automatic cleanup of old entries to prevent memory leaks
     - Thread-safe operations
@@ -43,7 +43,7 @@ class RateLimiter:
     Usage:
         rate_limiter = RateLimiter(
             requests_per_minute=10,
-            tokens_per_second=100
+            tokens_per_minute=40_000,
         )
 
         result = rate_limiter.check_rate_limit(
@@ -59,28 +59,29 @@ class RateLimiter:
     def __init__(
         self,
         requests_per_minute: int = StreamConfig.RATE_LIMIT_REQUESTS_PER_MINUTE,
-        tokens_per_second: int = StreamConfig.RATE_LIMIT_TOKENS_PER_SECOND,
+        tokens_per_minute: int = StreamConfig.RATE_LIMIT_TOKENS_PER_MINUTE,
         cleanup_interval: int = StreamConfig.RATE_LIMIT_CLEANUP_INTERVAL,
+        token_window_seconds: int = StreamConfig.RATE_LIMIT_TOKEN_WINDOW_SECONDS,
     ):
         """
         Initialize rate limiter.
 
         Args:
             requests_per_minute: Maximum requests per user per minute (sliding window)
-            tokens_per_second: Maximum tokens per user per second (token bucket)
+            tokens_per_minute: Maximum tokens per user per minute (sliding window)
             cleanup_interval: Seconds between automatic cleanup of old entries
+            token_window_seconds: Sliding window size in seconds for token tracking
         """
         self.requests_per_minute = requests_per_minute
-        self.tokens_per_second = tokens_per_second
+        self.tokens_per_minute = tokens_per_minute
         self.cleanup_interval = cleanup_interval
+        self.token_window_seconds = token_window_seconds
 
         # Sliding window: Track request timestamps per user
-        # Format: {author_id: deque([timestamp1, timestamp2, ...])}
         self._request_history: Dict[str, Deque[float]] = defaultdict(deque)
 
-        # Token bucket: Track token consumption per user
-        # Format: {author_id: (last_refill_time, available_tokens)}
-        self._token_buckets: Dict[str, Tuple[float, float]] = {}
+        # Sliding window: Track token usage per user
+        self._token_history: Dict[str, Deque[tuple[float, int]]] = defaultdict(deque)
 
         # Thread safety
         self._lock = Lock()
@@ -91,7 +92,7 @@ def __init__(
         logger.info(
             f"RateLimiter initialized - "
             f"requests_per_minute: {requests_per_minute}, "
-            f"tokens_per_second: {tokens_per_second}"
+            f"tokens_per_minute: {tokens_per_minute}"
         )
 
     def check_rate_limit(
@@ -121,7 +122,7 @@ def check_rate_limit(
             if not request_result.allowed:
                 return request_result
 
-            # Check 2: Token bucket (tokens per second)
+            # Check 2: Sliding window (tokens per minute)
             if estimated_tokens > 0:
                 token_result = self._check_token_limit(
                     author_id, estimated_tokens, current_time
@@ -186,12 +187,11 @@ def _check_token_limit(
         current_time: float,
     ) -> RateLimitResult:
         """
-        Check token bucket limit.
+        Check sliding window token limit.
 
-        Token bucket algorithm:
-        - Bucket refills at constant rate (tokens_per_second)
-        - Burst allowed up to bucket capacity
-        - Request denied if insufficient tokens
+        Sliding window algorithm:
+        - Track cumulative tokens consumed within the window
+        - Reject if adding estimated tokens would exceed the limit
 
         Args:
             author_id: User identifier
@@ -201,29 +201,31 @@ def _check_token_limit(
         Returns:
             RateLimitResult for token limit check
         """
-        bucket_capacity = self.tokens_per_second
-
-        # Get or initialize bucket for user
-        if author_id not in self._token_buckets:
-            # New user - start with full bucket
-            self._token_buckets[author_id] = (current_time, bucket_capacity)
-
-        last_refill, available_tokens = self._token_buckets[author_id]
-
-        # Refill tokens based on time elapsed
-        time_elapsed = current_time - last_refill
-        refill_amount = time_elapsed * self.tokens_per_second
-        available_tokens = min(bucket_capacity, available_tokens + refill_amount)
-
-        # Check if enough tokens available
-        if available_tokens < estimated_tokens:
-            # Calculate time needed to refill enough tokens
-            tokens_needed = estimated_tokens - available_tokens
-            retry_after = int(tokens_needed / self.tokens_per_second) + 1
+        token_history = self._token_history[author_id]
+        window_start = current_time - self.token_window_seconds
+
+        # Remove entries outside the sliding window
+        while token_history and token_history[0][0] < window_start:
+            token_history.popleft()
+
+        # Sum tokens consumed in the current window
+        current_token_usage = sum(tokens for _, tokens in token_history)
+
+        # Check if adding this request would exceed the limit
+        if current_token_usage + estimated_tokens > self.tokens_per_minute:
+            # Calculate retry_after based on oldest entry in window
+            if token_history:
+                oldest_timestamp = token_history[0][0]
+                retry_after = (
+                    int(oldest_timestamp + self.token_window_seconds - current_time) + 1
+                )
+            else:
+                retry_after = 1
 
             logger.warning(
                 f"Token rate limit exceeded for {author_id} - "
-                f"needed: {estimated_tokens}, available: {available_tokens:.0f} "
+                f"needed: {estimated_tokens}, "
+                f"current_usage: {current_token_usage}/{self.tokens_per_minute} "
                 f"(retry after {retry_after}s)"
             )
 
@@ -231,8 +233,8 @@ def _check_token_limit(
                 allowed=False,
                 retry_after=retry_after,
                 limit_type="tokens",
-                current_usage=int(bucket_capacity - available_tokens),
-                limit=self.tokens_per_second,
+                current_usage=current_token_usage,
+                limit=self.tokens_per_minute,
             )
 
         return RateLimitResult(allowed=True)
@@ -254,20 +256,9 @@ def _record_request(
         # Record request timestamp for sliding window
         self._request_history[author_id].append(current_time)
 
-        # Deduct tokens from bucket
-        if tokens_consumed > 0 and author_id in self._token_buckets:
-            last_refill, available_tokens = self._token_buckets[author_id]
-
-            # Refill before deducting
-            time_elapsed = current_time - last_refill
-            refill_amount = time_elapsed * self.tokens_per_second
-            available_tokens = min(
-                self.tokens_per_second, available_tokens + refill_amount
-            )
-
-            # Deduct tokens
-            available_tokens -= tokens_consumed
-            self._token_buckets[author_id] = (current_time, available_tokens)
+        # Record token usage for sliding window
+        if tokens_consumed > 0:
+            self._token_history[author_id].append((current_time, tokens_consumed))
 
     def _cleanup_old_entries(self, current_time: float) -> None:
         """
@@ -294,23 +285,25 @@ def _cleanup_old_entries(self, current_time: float) -> None:
         for author_id in users_to_remove:
             del self._request_history[author_id]
 
-        # Clean up token buckets (remove entries inactive for 5 minutes)
-        inactive_threshold = current_time - 300
-        buckets_to_remove: list[str] = []
+        # Clean up token history (remove entries outside window + inactive users)
+        token_window_start = current_time - self.token_window_seconds
+        token_users_to_remove: list[str] = []
 
-        for author_id, (last_refill, _) in self._token_buckets.items():
-            if last_refill < inactive_threshold:
-                buckets_to_remove.append(author_id)
+        for author_id, token_history in self._token_history.items():
+            while token_history and token_history[0][0] < token_window_start:
+                token_history.popleft()
+            if not token_history:
+                token_users_to_remove.append(author_id)
 
-        for author_id in buckets_to_remove:
-            del self._token_buckets[author_id]
+        for author_id in token_users_to_remove:
+            del self._token_history[author_id]
 
         self._last_cleanup = current_time
 
-        if users_to_remove or buckets_to_remove:
+        if users_to_remove or token_users_to_remove:
             logger.debug(
                 f"Cleaned up {len(users_to_remove)} request histories and "
-                f"{len(buckets_to_remove)} token buckets"
+                f"{len(token_users_to_remove)} token histories"
             )
 
     def get_stats(self) -> Dict[str, Any]:
@@ -323,9 +316,9 @@ def get_stats(self) -> Dict[str, Any]:
         with self._lock:
             return {
                 "total_users_tracked": len(self._request_history),
-                "total_token_buckets": len(self._token_buckets),
+                "total_token_histories": len(self._token_history),
                 "requests_per_minute_limit": self.requests_per_minute,
-                "tokens_per_second_limit": self.tokens_per_second,
+                "tokens_per_minute_limit": self.tokens_per_minute,
                 "last_cleanup": self._last_cleanup,
             }
 
@@ -339,7 +332,7 @@ def reset_user(self, author_id: str) -> None:
         with self._lock:
             if author_id in self._request_history:
                 del self._request_history[author_id]
-            if author_id in self._token_buckets:
-                del self._token_buckets[author_id]
+            if author_id in self._token_history:
+                del self._token_history[author_id]
 
             logger.info(f"Reset rate limits for user: {author_id}")

From d647f865175e450aa5148fac4558a01b41c2f741 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Mon, 9 Mar 2026 10:00:46 +0530
Subject: [PATCH 22/27] fixed pr review suggested issues

---
 docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md    |  6 +++---
 src/tool_classifier/context_analyzer.py        | 11 +++++++++--
 .../workflows/context_workflow.py              | 18 ++++++++++++------
 src/tool_classifier/workflows/rag_workflow.py  |  7 ++++---
 src/utils/rate_limiter.py                      | 15 ++++++++++-----
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md b/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
index 4df8d1e..8a67e84 100644
--- a/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
+++ b/docs/CONTEXT_WORKFLOW_GREETING_DETECTION.md
@@ -144,9 +144,9 @@ Calls `stream_context_response(query, context_snippet)` which uses DSPy native s
 
 ### Greeting Response Generation
 
-The LLM generates contextually appropriate responses in the **same language** as the query. If the LLM detects a greeting but fails to produce an answer (e.g., JSON parse error), the system falls back to predefined static responses from `greeting_constants.py`.
-
-**Fallback responses (`greeting_constants.py`):**
+Greeting detection is handled in **Phase 1 (`detect_context`)**, where the LLM classifies whether the query is a greeting and, if so, identifies the language and greeting type. This phase does **not** generate the final natural-language reply.
+In **Phase 2**, `ContextWorkflowExecutor` calls `get_greeting_response(...)`, which returns a response based on predefined static templates in `greeting_constants.py`, ensuring the reply is in the detected language. If greeting detection fails or the greeting type is unsupported, the query falls through to the next workflow layer instead of attempting LLM-based greeting generation.
+**Greeting response templates (`greeting_constants.py`):**
 
 ```python
 GREETINGS_ET = {
diff --git a/src/tool_classifier/context_analyzer.py b/src/tool_classifier/context_analyzer.py
index 51aa214..4572aef 100644
--- a/src/tool_classifier/context_analyzer.py
+++ b/src/tool_classifier/context_analyzer.py
@@ -109,6 +109,10 @@ class ContextDetectionResult(BaseModel):
     """Result of Phase 1 context detection (classify only, no answer generation)."""
 
     is_greeting: bool = Field(..., description="Whether the query is a greeting")
+    greeting_type: str = Field(
+        default="hello",
+        description="Type of greeting: hello, goodbye, thanks, or casual",
+    )
     can_answer_from_context: bool = Field(
         ..., description="Whether the query can be answered from conversation history"
     )
@@ -143,8 +147,10 @@ class ContextDetectionSignature(dspy.Signature):
     )
     user_query: str = dspy.InputField(desc="Current user query to classify")
     detection_result: str = dspy.OutputField(
-        desc='JSON object with: {"is_greeting": bool, "can_answer_from_context": bool, '
+        desc='JSON object with: {"is_greeting": bool, "greeting_type": str, "can_answer_from_context": bool, '
         '"reasoning": str, "context_snippet": str|null}. '
+        'greeting_type must be one of: "hello", "goodbye", "thanks", "casual" — '
+        'set it only when is_greeting is true, defaulting to "hello" otherwise. '
         "context_snippet should contain the relevant excerpt from history if can_answer_from_context is true, "
         "or null otherwise. Do NOT generate the final answer — only detect and extract."
     )
@@ -323,6 +329,7 @@ async def detect_context(
 
             result = ContextDetectionResult(
                 is_greeting=detection_data.get("is_greeting", False),
+                greeting_type=detection_data.get("greeting_type", "hello"),
                 can_answer_from_context=detection_data.get(
                     "can_answer_from_context", False
                 ),
@@ -890,4 +897,4 @@ def get_fallback_greeting_response(self, language: str = "et") -> str:
             "et": "Tere! Kuidas ma saan sind aidata?",
             "en": "Hello! How can I help you?",
         }
-        return greetings.get(language, greetings["en"])
+        return greetings.get(language, greetings["et"])
diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py
index 2c83769..8d69675 100644
--- a/src/tool_classifier/workflows/context_workflow.py
+++ b/src/tool_classifier/workflows/context_workflow.py
@@ -282,7 +282,9 @@ async def execute_async(
         if detection_result.is_greeting:
             from src.tool_classifier.greeting_constants import get_greeting_response
 
-            greeting = get_greeting_response(language=language)
+            greeting = get_greeting_response(
+                greeting_type=detection_result.greeting_type, language=language
+            )
             self._log_costs(costs_metric)
             return OrchestrationResponse(
                 chatId=request.chatId,
@@ -347,15 +349,19 @@ async def execute_streaming(
         if detection_result.is_greeting:
             from src.tool_classifier.greeting_constants import get_greeting_response
 
-            greeting = get_greeting_response(language=language)
+            greeting = get_greeting_response(
+                greeting_type=detection_result.greeting_type, language=language
+            )
             orchestration_service = self.orchestration_service
+            if orchestration_service is None:
+                self._log_costs(costs_metric)
+                return None
             chat_id = request.chatId
 
             async def _stream_greeting() -> AsyncIterator[str]:
-                if orchestration_service:
-                    yield orchestration_service.format_sse(chat_id, greeting)
-                    yield orchestration_service.format_sse(chat_id, "END")
-                    orchestration_service.log_costs(costs_metric)
+                yield orchestration_service.format_sse(chat_id, greeting)
+                yield orchestration_service.format_sse(chat_id, "END")
+                orchestration_service.log_costs(costs_metric)
 
             return _stream_greeting()
 
diff --git a/src/tool_classifier/workflows/rag_workflow.py b/src/tool_classifier/workflows/rag_workflow.py
index 1b3ba4d..9c983ce 100644
--- a/src/tool_classifier/workflows/rag_workflow.py
+++ b/src/tool_classifier/workflows/rag_workflow.py
@@ -109,9 +109,10 @@ async def execute_streaming(
         """
         Execute RAG workflow in streaming mode.
 
-        Returns an AsyncIterator (not an async generator) so callers can
-        safely use ``await workflow.execute_streaming(...)`` without hitting
-        a TypeError from awaiting an async generator.
+        Coroutine that returns an AsyncIterator so callers can safely use
+        ``await workflow.execute_streaming(...)`` and then iterate over the
+        returned stream without hitting a TypeError from awaiting an async
+        generator.
 
         Delegates to existing streaming pipeline which handles:
         - Prompt refinement (blocking)
diff --git a/src/utils/rate_limiter.py b/src/utils/rate_limiter.py
index 5de46d8..d86829f 100644
--- a/src/utils/rate_limiter.py
+++ b/src/utils/rate_limiter.py
@@ -76,6 +76,9 @@ def __init__(
         self.tokens_per_minute = tokens_per_minute
         self.cleanup_interval = cleanup_interval
         self.token_window_seconds = token_window_seconds
+        # Scale the per-minute limit to the actual window size so the
+        # sliding-window comparison is consistent regardless of window length.
+        self.tokens_per_window = int(tokens_per_minute * token_window_seconds / 60)
 
         # Sliding window: Track request timestamps per user
         self._request_history: Dict[str, Deque[float]] = defaultdict(deque)
@@ -211,8 +214,8 @@ def _check_token_limit(
         # Sum tokens consumed in the current window
         current_token_usage = sum(tokens for _, tokens in token_history)
 
-        # Check if adding this request would exceed the limit
-        if current_token_usage + estimated_tokens > self.tokens_per_minute:
+        # Check if adding this request would exceed the scaled window limit
+        if current_token_usage + estimated_tokens > self.tokens_per_window:
             # Calculate retry_after based on oldest entry in window
             if token_history:
                 oldest_timestamp = token_history[0][0]
@@ -225,8 +228,10 @@ def _check_token_limit(
             logger.warning(
                 f"Token rate limit exceeded for {author_id} - "
                 f"needed: {estimated_tokens}, "
-                f"current_usage: {current_token_usage}/{self.tokens_per_minute} "
-                f"(retry after {retry_after}s)"
+                f"current_usage: {current_token_usage}/{self.tokens_per_window} "
+                f"(window: {self.token_window_seconds}s, "
+                f"rate: {self.tokens_per_minute}/min, "
+                f"retry after {retry_after}s)"
             )
 
             return RateLimitResult(
@@ -234,7 +239,7 @@ def _check_token_limit(
                 retry_after=retry_after,
                 limit_type="tokens",
                 current_usage=current_token_usage,
-                limit=self.tokens_per_minute,
+                limit=self.tokens_per_window,
             )
 
         return RateLimitResult(allowed=True)

From 6c46d3c9da11e6f3d4a59843e11e07576801bd40 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Tue, 10 Mar 2026 16:38:50 +0530
Subject: [PATCH 23/27] removed service project layer

---
 DSL/Ruuter.public/services/GET/.guard         |  28 --
 .../GET/account/user-profile-settings.yml     |  27 --
 .../services/GET/active-services.yml          |  20 -
 DSL/Ruuter.public/services/GET/generic/.guard |   4 -
 .../accounts/customer-support-activity.yml    |  22 -
 .../accounts/user-profile-settings.yml        |  26 --
 .../GET/generic/accounts/user-role.yml        |  19 -
 .../services/GET/generic/csa/active-chats.yml |  76 ----
 .../services/GET/generic/userinfo.yml         |  29 --
 DSL/Ruuter.public/services/GET/get-sticky.yml |  40 --
 .../services/GET/internal/domain-file.yml     |  40 --
 .../GET/internal/return-file-locations.yml    |  27 --
 .../GET/mocks/client-input-variables.yml      |  19 -
 .../services/GET/mocks/service-settings.yml   |  19 -
 .../services/GET/mocks/validation-mock.yml    |  26 --
 .../services/GET/rasa/rule-names.yml          |  29 --
 .../services/GET/secrets-with-priority.yml    |  34 --
 DSL/Ruuter.public/services/GET/secrets.yml    |  20 -
 .../services/GET/service-settings.yml         |  19 -
 .../services/GET/services/active/.guard       |   4 -
 .../services/GET/services/draft/.guard        |   4 -
 .../services/GET/services/inactive/.guard     |   4 -
 .../services/GET/services/log-by-request.yml  |  33 --
 .../services/GET/services/log-by-service.yml  |  33 --
 .../GET/services/services-detailed/nok.yml    |  52 ---
 .../services/GET/services/statistics.yml      |  21 -
 .../services/GET/services/status.yml          |  24 --
 DSL/Ruuter.public/services/GET/slots.yml      |  26 --
 .../services/GET/steps/preferences.yml        |  72 ----
 .../services/GET/sticky/example.yml           |  58 ---
 DSL/Ruuter.public/services/POST/.guard        |  28 --
 DSL/Ruuter.public/services/POST/auth/.guard   |   4 -
 .../services/POST/auth/login.yml              | 101 -----
 DSL/Ruuter.public/services/POST/csv.yml       |  50 ---
 .../POST/dates/calculate-difference.yml       |  82 ----
 .../services/POST/endpoints/common.yml        |  48 ---
 .../services/POST/file/rename.yml             |  42 --
 .../services/POST/mocks/RBAC-mock.yml         |  50 ---
 .../POST/mocks/dates/calculate-difference.yml | 136 -------
 .../services/POST/mocks/service-settings.yml  |  19 -
 .../services/POST/mocks/services/add.yml      |  44 --
 .../mocks/services/open-api-spec-mock.yml     |  17 -
 .../services/POST/mocks/user-info.yml         |  52 ---
 .../services/POST/mocks/validation-mock.yml   |  26 --
 .../services/POST/rasa/rules/add.yml          | 129 ------
 .../services/POST/saveJsonToYml.yml           |  39 --
 .../services/POST/service-by-id.yml           |  90 -----
 .../services/POST/service-settings.yml        |  29 --
 DSL/Ruuter.public/services/POST/services.yml  |  43 --
 .../services/POST/services/active/.guard      |   4 -
 .../services/active/Broneeringu_kinnitus.yml  |  65 ---
 .../active/Kalastusloa_uuendamise_teade.yml   |  60 ---
 .../POST/services/active/Koolivaheajad.yml    |  63 ---
 .../services/active/Lihtne_test_teenus.yml    |  61 ---
 .../services/active/customer_feedback.yml     |  82 ----
 .../services/POST/services/add.yml            | 177 --------
 .../POST/services/create-endpoint.yml         |  46 ---
 .../POST/services/delete-endpoint.yml         |  34 --
 .../services/POST/services/delete.yml         | 155 -------
 .../services/domain-intent-service-link.yml   | 157 --------
 .../services/POST/services/draft/.guard       |   4 -
 .../services/POST/services/draft/test.tmp     |  48 ---
 .../services/POST/services/edit.yml           | 381 ------------------
 .../POST/services/endpoint-url-validation.yml |  32 --
 .../POST/services/import-services.yml         |  71 ----
 .../services/POST/services/inactive/.guard    |   4 -
 .../services/POST/services/open-api-spec.yml  |  34 --
 .../POST/services/requests/explain.yml        |  95 -----
 .../services/POST/services/resql/add.yml      |  62 ---
 .../services/POST/services/status.yml         | 303 --------------
 .../POST/services/update-endpoint.yml         |  61 ---
 .../services/POST/steps/preferences.yml       |  68 ----
 DSL/Ruuter.public/services/POST/user-info.yml |  16 -
 DSL/Ruuter.public/services/TEMPLATES/RBAC.yml |  51 ---
 .../TEMPLATES/check-user-authority.yml        |  50 ---
 .../services/TEMPLATES/client-input.yml       |  19 -
 .../services/TEMPLATES/direct-to-cs.yml       |  42 --
 .../services/TEMPLATES/end-conversation.yml   |  42 --
 .../services/TEMPLATES/file-generate.yml      |  45 ---
 .../services/TEMPLATES/file-signing.yml       |  35 --
 .../services/TEMPLATES/open-webpage.yml       |  44 --
 .../TEMPLATES/send-message-to-client.yml      |  42 --
 DSL/Ruuter.public/services/TEMPLATES/siga.yml | 132 ------
 DSL/Ruuter.public/services/TEMPLATES/tara.yml |  51 ---
 .../TEMPLATES/validation-template.yml         |  56 ---
 src/tool_classifier/constants.py              |   2 +-
 86 files changed, 1 insertion(+), 4577 deletions(-)
 delete mode 100644 DSL/Ruuter.public/services/GET/.guard
 delete mode 100644 DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/active-services.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/.guard
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/generic/userinfo.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/get-sticky.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/internal/domain-file.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/mocks/service-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/rasa/rule-names.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/secrets-with-priority.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/secrets.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/service-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/services/active/.guard
 delete mode 100644 DSL/Ruuter.public/services/GET/services/draft/.guard
 delete mode 100644 DSL/Ruuter.public/services/GET/services/inactive/.guard
 delete mode 100644 DSL/Ruuter.public/services/GET/services/log-by-request.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/services/log-by-service.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/services/statistics.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/services/status.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/slots.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/steps/preferences.yml
 delete mode 100644 DSL/Ruuter.public/services/GET/sticky/example.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/.guard
 delete mode 100644 DSL/Ruuter.public/services/POST/auth/.guard
 delete mode 100644 DSL/Ruuter.public/services/POST/auth/login.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/csv.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/endpoints/common.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/file/rename.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/service-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/services/add.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/user-info.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/rasa/rules/add.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/saveJsonToYml.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/service-by-id.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/service-settings.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/.guard
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/add.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/create-endpoint.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/delete.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/draft/.guard
 delete mode 100644 DSL/Ruuter.public/services/POST/services/draft/test.tmp
 delete mode 100644 DSL/Ruuter.public/services/POST/services/edit.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/import-services.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/inactive/.guard
 delete mode 100644 DSL/Ruuter.public/services/POST/services/open-api-spec.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/requests/explain.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/resql/add.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/status.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/services/update-endpoint.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/steps/preferences.yml
 delete mode 100644 DSL/Ruuter.public/services/POST/user-info.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/client-input.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/siga.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/tara.yml
 delete mode 100644 DSL/Ruuter.public/services/TEMPLATES/validation-template.yml

diff --git a/DSL/Ruuter.public/services/GET/.guard b/DSL/Ruuter.public/services/GET/.guard
deleted file mode 100644
index 4fd565b..0000000
--- a/DSL/Ruuter.public/services/GET/.guard
+++ /dev/null
@@ -1,28 +0,0 @@
-check_for_cookie:
-  switch:
-    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
-      next: guard_fail
-  next: authenticate
-
-authenticate:
-  template: "[#SERVICE_PROJECT_LAYER]/check-user-authority"
-  requestType: templates
-  headers:
-    cookie: ${incoming.headers.cookie}
-  result: authority_result
-
-check_authority_result:
-  switch:
-    - condition: ${authority_result !== "false"}
-      next: guard_success
-  next: guard_fail
-
-guard_success:
-  return: "success"
-  status: 200
-  next: end
-
-guard_fail:
-  return: "unauthorized"
-  status: 401
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml b/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
deleted file mode 100644
index 320d4af..0000000
--- a/DSL/Ruuter.public/services/GET/account/user-profile-settings.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USER-PROFILE-SETTINGS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-      response:
-        - userId: EE30303039914
-          forwardedChatPopupNotifications: false
-          forwardedChatSoundNotifications: false
-          forwardedChatEmailNotifications: false
-          newChatPopupNotifications: false
-          newChatSoundNotifications: false
-          newChatEmailNotifications: false
-          useAutocorrect: true
-  result: reflected_request
-
-step_2:
-  wrapper: false
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/active-services.yml b/DSL/Ruuter.public/services/GET/active-services.yml
deleted file mode 100644
index 873bb54..0000000
--- a/DSL/Ruuter.public/services/GET/active-services.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ACTIVE-SERVICES'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_services_list:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-active-services-list"
-  result: results
-
-return_ok:
-  status: 200
-  wrapper: false
-  return: ${results.response.body}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/generic/.guard b/DSL/Ruuter.public/services/GET/generic/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/GET/generic/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml b/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
deleted file mode 100644
index 25c2e33..0000000
--- a/DSL/Ruuter.public/services/GET/generic/accounts/customer-support-activity.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CUSTOMER-SUPPORT-ACTIVITY'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-      response:
-        - idCode: 'EE49902216518'
-          active: 'true'
-          status: 'idle'
-  result: reflected_request
-
-step_2:
-  wrapper: true
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml b/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
deleted file mode 100644
index 344b83c..0000000
--- a/DSL/Ruuter.public/services/GET/generic/accounts/user-profile-settings.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USER-PROFILE-SETTINGS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-      data:
-        - userId: EE30303039914
-          forwardedChatPopupNotifications: false
-          forwardedChatSoundNotifications: false
-          forwardedChatEmailNotifications: false
-          newChatPopupNotifications: false
-          newChatSoundNotifications: false
-          newChatEmailNotifications: false
-          useAutocorrect: true
-  result: reflected_request
-
-step_2:
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml b/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
deleted file mode 100644
index 7794dbb..0000000
--- a/DSL/Ruuter.public/services/GET/generic/accounts/user-role.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USER-ROLE'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-      response:
-        - "ROLE_ADMINISTRATOR"
-  result: reflected_request
-
-step_2:
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml b/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
deleted file mode 100644
index 02d95eb..0000000
--- a/DSL/Ruuter.public/services/GET/generic/csa/active-chats.yml
+++ /dev/null
@@ -1,76 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ACTIVE-CHATS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-      response:
-          - id: '22fa5630-6f92-4d50-92ba-685c872383af'
-            customerSupportId: ''
-            customerSupportDisplayName: ''
-            endUserId: ''
-            endUserFirstName: ''
-            endUserLastName: ''
-            status: 'OPEN'
-            created: '2023-01-17T13:18:38.808+00:00'
-            updated: '2023-01-17T13:19:26.348+00:00'
-            ended: null
-            endUserOs: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
-            endUserUrl: 'https://test.buerokratt.ee/'
-            forwardedToName: null
-            forwardedByUser: ''
-            forwardedFromCsa: ''
-            forwardedToCsa: ''
-            lastMessage: 'Suunan teid klienditeenindajale. Varuge natukene kannatust.'
-            contactsMessage: null
-            lastMessageTimestamp: '2023-01-17T13:19:26.316+00:00'
-          - id: '5206b7bd-0812-40a8-ae1d-3774f07f06f0'
-            customerSupportId: ''
-            customerSupportDisplayName: ''
-            endUserId: ''
-            endUserFirstName: ''
-            endUserLastName: ''
-            status: 'OPEN'
-            created: '2023-01-19T13:38:32.421+00:00'
-            updated: '2023-01-19T13:38:32.430+00:00'
-            ended: null
-            endUserOs: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
-            endUserUrl: 'https://test.buerokratt.ee/'
-            forwardedToName: null
-            forwardedByUser: ''
-            forwardedFromCsa: ''
-            forwardedToCsa: ''
-            lastMessage: 'aitäh'
-            contactsMessage: null
-            lastMessageTimestamp: '2022-11-23T09:33:56.803+00:00'
-          - id: 'b7bba1c2-b7ab-4b17-825a-2d66a7d16fc4'
-            customerSupportId: ''
-            customerSupportDisplayName: ''
-            endUserId: ''
-            endUserFirstName: ''
-            endUserLastName: ''
-            status: 'OPEN'
-            created: '2023-01-19T13:38:32.421+00:00'
-            updated: '2023-01-19T13:38:32.430+00:00'
-            ended: null
-            endUserOs: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
-            endUserUrl: 'https://test.buerokratt.ee/'
-            forwardedToName: null
-            forwardedByUser: ''
-            forwardedFromCsa: ''
-            forwardedToCsa: ''
-            lastMessage: 'dasnhpwa'
-            contactsMessage: null
-            lastMessageTimestamp: '2023-01-18T12:24:54.557+00:00'
-  result: reflected_request
-
-step_2:
-  wrapper: true
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/generic/userinfo.yml b/DSL/Ruuter.public/services/GET/generic/userinfo.yml
deleted file mode 100644
index 1278132..0000000
--- a/DSL/Ruuter.public/services/GET/generic/userinfo.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USERINFO'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-step_1:
-  call: reflect.mock
-  args:
-    response:
-        data:
-          firstName: OK
-          lastName: TESTNUMBER
-          idCode: EE30303039914
-          displayName: OK
-          JWTCreated: 1704724715000
-          login: EE30303039914
-          csaEmail: mail@mail.ee
-          authorities:
-            - ROLE_ADMINISTRATOR
-          csaTitle: OG
-          JWTExpirationTimestamp: 1704739715000
-  result: reflected_request
-
-step_2:
-  return: ${reflected_request.response.body}
diff --git a/DSL/Ruuter.public/services/GET/get-sticky.yml b/DSL/Ruuter.public/services/GET/get-sticky.yml
deleted file mode 100644
index ca906d6..0000000
--- a/DSL/Ruuter.public/services/GET/get-sticky.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'GET-STICKY'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    params:
-      - field: name
-        type: string
-        description: "Parameter 'name'"
-
-check_for_parameters:
-  switch:
-    - condition: ${incoming.params == null || incoming.params.name == null}
-      next: get_all_sticky_services
-  next: get_single_sticky_service
-
-get_single_sticky_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/ruuter/sticky/steps"
-    query:
-      name: ${incoming.params.name}
-  result: results
-  next: return_ok
-
-get_all_sticky_services:
-  call: http.get
-  args:
-    url: "[#SERVICE_DMAPPER]/ruuter/sticky"
-  result: results
-  next: return_ok
-
-return_ok:
-  status: 200
-  return: ${results.response.body}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/internal/domain-file.yml b/DSL/Ruuter.public/services/GET/internal/domain-file.yml
deleted file mode 100644
index 2e42dc9..0000000
--- a/DSL/Ruuter.public/services/GET/internal/domain-file.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'DOMAIN-FILE'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-getFileLocations:
-  call: http.get
-  args:
-    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
-    headers:
-      cookie: ${incoming.headers.cookie}
-  result: fileLocations
-
-getDomainFile:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/read-file"
-    body:
-      file_path: ${fileLocations.response.body.response.domain_location}
-  result: domainFile
-
-convertYamlToJson:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/yaml_to_json"
-    body:
-      file: ${domainFile.response.body.file}
-  result: domainData
-
-return_value:
-  return: ${domainData.response.body}
diff --git a/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml b/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
deleted file mode 100644
index 0ebab6a..0000000
--- a/DSL/Ruuter.public/services/GET/internal/return-file-locations.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'RETURN-FILE-LOCATIONS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-assign_step:
-  assign:
-    locations:
-      rules_location: "[#DMAPPER_LOCATIONS_PATH]/data/rules.yml"
-      stories_location: "[#DMAPPER_LOCATIONS_PATH]/data/stories.yml"
-      domain_location: "[#DMAPPER_LOCATIONS_PATH]/data/domain.yml"
-      test_stories_location: "[#DMAPPER_LOCATIONS_PATH]/test_stories.yml"
-      intents_location: "[#DMAPPER_LOCATIONS_PATH]/nlu/"
-      regex_location: "[#DMAPPER_LOCATIONS_PATH]/regex"
-      training_result_location: "[#DMAPPER_LOCATIONS_PATH]/results"
-      config_location: "[#DMAPPER_LOCATIONS_PATH]/data/config.yml"
-return_value:
-  return: ${locations}
diff --git a/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml b/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
deleted file mode 100644
index dab285a..0000000
--- a/DSL/Ruuter.public/services/GET/mocks/client-input-variables.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CLIENT-INPUT-VARIABLES'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-mock_variables:
-  call: reflect.mock
-  args:
-    response:
-      data: ['{{user.firstname}}', '{{user.lastname}}', '{{user.birthdate}}', '{{user.email}}', '{{invoice.total}}', '{{invoice.subtotal}}']
-  result: mock_res
-
-return_result:
-  wrapper: false
-  return: ${mock_res.response.body.data}
diff --git a/DSL/Ruuter.public/services/GET/mocks/service-settings.yml b/DSL/Ruuter.public/services/GET/mocks/service-settings.yml
deleted file mode 100644
index 35482b2..0000000
--- a/DSL/Ruuter.public/services/GET/mocks/service-settings.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICE-SETTINGS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-call_mock:
-  call: reflect.mock
-  args:
-    response:
-      maxInputTry: 4
-  result: mock_res
-
-return_result:
-  wrapper: false
-  return: ${mock_res.response.body}
diff --git a/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml b/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
deleted file mode 100644
index e445255..0000000
--- a/DSL/Ruuter.public/services/GET/mocks/validation-mock.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'VALIDATION-MOCK'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-call_mock:
-  call: reflect.mock
-  args:
-    response:
-      project: "Bürokratt"
-      website: "www.kratid.ee"
-  result: mock_res
-
-call_template:
-  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
-  requestType: templates
-  body:
-    response: ${mock_res.response}
-  result: templateResult
-
-return_result:
-  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/GET/rasa/rule-names.yml b/DSL/Ruuter.public/services/GET/rasa/rule-names.yml
deleted file mode 100644
index 5fd45b2..0000000
--- a/DSL/Ruuter.public/services/GET/rasa/rule-names.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'RULE-NAMES'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-getRules:
-  call: http.get
-  args:
-    url: "[#SERVICE_OPENSEARCH]/rules/_search?size=1000"
-  result: getRulesResult
-
-mapRulesData:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/get_rule_names"
-    headers:
-      type: 'json'
-    body:
-      hits: ${getRulesResult.response.body.hits.hits}
-  result: rulesData
-  next: returnSuccess
-
-returnSuccess:
-  return: ${rulesData.response.body}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/secrets-with-priority.yml b/DSL/Ruuter.public/services/GET/secrets-with-priority.yml
deleted file mode 100644
index 602eeff..0000000
--- a/DSL/Ruuter.public/services/GET/secrets-with-priority.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SECRETS-WITH-PRIORITY'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-check_parameters:
-  switch:
-    - condition: ${incoming.params !== null && incoming.params.type === 'test'}
-      next: get_ruuter_secrets_test_priority
-  next: get_ruuter_secrets_prod_priority
-
-get_ruuter_secrets_prod_priority:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/secrets/get-with-priority"
-  result: results
-  next: return_ok
-
-get_ruuter_secrets_test_priority:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/secrets/get-with-priority?priority=test"
-  result: results
-  next: return_ok
-
-return_ok:
-  status: 200
-  wrapper: false
-  return: ${results.response.body}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/secrets.yml b/DSL/Ruuter.public/services/GET/secrets.yml
deleted file mode 100644
index 60913c8..0000000
--- a/DSL/Ruuter.public/services/GET/secrets.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SECRETS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_ruuter_secrets:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/secrets/get-all"
-  result: results
-
-return_ok:
-  status: 200
-  wrapper: false
-  return: ${results.response.body}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/service-settings.yml b/DSL/Ruuter.public/services/GET/service-settings.yml
deleted file mode 100644
index abe4ff0..0000000
--- a/DSL/Ruuter.public/services/GET/service-settings.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICE-SETTINGS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-updateSettings:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-settings"
-  result: results
-
-returnSuccess:
-  wrapper: false
-  status: 200
-  return: ${results.response.body}
diff --git a/DSL/Ruuter.public/services/GET/services/active/.guard b/DSL/Ruuter.public/services/GET/services/active/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/GET/services/active/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/draft/.guard b/DSL/Ruuter.public/services/GET/services/draft/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/GET/services/draft/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/inactive/.guard b/DSL/Ruuter.public/services/GET/services/inactive/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/GET/services/inactive/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/log-by-request.yml b/DSL/Ruuter.public/services/GET/services/log-by-request.yml
deleted file mode 100644
index 9ee096b..0000000
--- a/DSL/Ruuter.public/services/GET/services/log-by-request.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'LOG-BY-REQUEST'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_services_stat:
-  call: http.post
-  args:
-    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
-    body:
-      id: 'get-log-by-request'
-      params: ${incoming.params}
-  result: results
-
-check_result:
-  switch:
-    - condition: ${results.response.body.found === true}
-      next: return_ok
-  next: return_not_found
-
-return_not_found:
-  status: 404
-  return: 'index not found'
-  next: end
-
-return_ok:
-  status: 200
-  return: ${results.response.body._source}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/log-by-service.yml b/DSL/Ruuter.public/services/GET/services/log-by-service.yml
deleted file mode 100644
index 0613800..0000000
--- a/DSL/Ruuter.public/services/GET/services/log-by-service.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'LOG-BY-SERVICE'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_services_stat:
-  call: http.post
-  args:
-    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
-    body:
-      id: 'get-log-by-service'
-      params: ${incoming.params}
-  result: results
-
-check_result:
-  switch:
-    - condition: ${results.response.body.found === true}
-      next: return_ok
-  next: return_not_found
-
-return_not_found:
-  status: 404
-  return: 'index not found'
-  next: end
-
-return_ok:
-  status: 200
-  return: ${results.response.body._source}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml b/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
deleted file mode 100644
index 5fea871..0000000
--- a/DSL/Ruuter.public/services/GET/services/services-detailed/nok.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'NOK'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    params:
-      - field: page
-        type: number
-        description: "Parameter 'page'"
-      - field: page_size
-        type: number
-        description: "Parameter 'page_size'"
-      - field: sorting
-        type: string
-        description: "Parameter 'sorting'"
-      - field: order
-        type: string
-        description: "Parameter 'order'"  
-
-getFaults:
-  call: http.post
-  args:
-    url: "[#SERVICE_OPENSEARCH]/ruuterlog/_search"
-    query:
-      from: ${(incoming.params.page - 1) * incoming.params.page_size}
-      size: ${incoming.params.page_size}
-      _source_excludes: "stackTrace,statusCode"
-    body:
-      sort: [{ "timestamp": { "order": "${incoming.params.order}" } }]
-      query:
-        match_phrase_prefix:
-          dslName:
-            query: "services/active"
-  result: getFaultsResult
-
-mapFaultsData:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/get-faults"
-    headers:
-      type: "json"
-    body:
-      data: { "hits": "${getFaultsResult.response.body.hits.hits}" }
-  result: faultsData
-
-returnSuccess:
-  wrapper: false
-  return: ${[faultsData.response.body, getFaultsResult.response.body.hits.total.value]}
diff --git a/DSL/Ruuter.public/services/GET/services/statistics.yml b/DSL/Ruuter.public/services/GET/services/statistics.yml
deleted file mode 100644
index 6b3110f..0000000
--- a/DSL/Ruuter.public/services/GET/services/statistics.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'STATISTICS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_services_stat:
-  call: http.post
-  args:
-    url: "[#SERVICE_OPENSEARCH]/services/_search/template"
-    body:
-      id: 'get-services-stat'
-  result: results
-
-return_ok:
-  status: 200
-  return: ${results.response.body.hits.hits}
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/services/status.yml b/DSL/Ruuter.public/services/GET/services/status.yml
deleted file mode 100644
index 779451a..0000000
--- a/DSL/Ruuter.public/services/GET/services/status.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'STATUS'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    params:
-      - field: id
-        type: string
-        description: "Parameter 'id'"
-
-get_status:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/status"
-    body:
-      id: ${incoming.params.id}
-  result: res
-
-return_value:
-  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/GET/slots.yml b/DSL/Ruuter.public/services/GET/slots.yml
deleted file mode 100644
index d38375c..0000000
--- a/DSL/Ruuter.public/services/GET/slots.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Get slots from OpenSearch"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-getSlots:
-  call: http.post
-  args:
-    url: "[#SERVICE_OPENSEARCH]/domain/_search/template"
-    body:
-      id: "domain-objects-with-pagination"
-      params:
-        type: "slots"
-        filter: ""
-        from: 0
-        size: 1000
-  result: getSlotsResult
-
-returnSuccess:
-  return: ${getSlotsResult.response.body.hits.hits[0].fields.filtered_items[0]}
-  wrapper: false
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/steps/preferences.yml b/DSL/Ruuter.public/services/GET/steps/preferences.yml
deleted file mode 100644
index 283ac69..0000000
--- a/DSL/Ruuter.public/services/GET/steps/preferences.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'PREFERENCES'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-
-get_user_info:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
-    contentType: plaintext
-    headers:
-      cookie: ${incoming.headers.cookie}
-    plaintext: "customJwtCookie"
-  result: res
-
-check_user_info_response:
-  switch:
-    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
-      next: assignIdCode
-  next: return_unauthorized
-
-assignIdCode:
-  assign:
-    idCode: ${res.response.body.idCode}
-
-get_user_step_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-user-step-preferences"
-    body:
-      user_id_code: ${idCode}
-  result: preferences
-
-check_preferences_response:
-  switch:
-    - condition: ${preferences.response.body.length > 0}
-      next: return_preferences
-  next: seed_default_user_preferences
-
-seed_default_user_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/seed-user-step-preferences"
-    body:
-      user_id_code: ${idCode}
-  result: seed_preferences_res
-  next: refetch_user_step_preferences
-
-refetch_user_step_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-user-step-preferences"
-    body:
-      user_id_code: ${idCode}
-  result: refetched_preferences
-
-return_refetched_preferences:
-  return: ${refetched_preferences.response.body[0]}
-  next: end
-
-return_preferences:
-  return: ${preferences.response.body[0]}
-  next: end
-
-return_unauthorized:
-  status: 401
-  return: "unauthorized"
-  next: end
diff --git a/DSL/Ruuter.public/services/GET/sticky/example.yml b/DSL/Ruuter.public/services/GET/sticky/example.yml
deleted file mode 100644
index cedef90..0000000
--- a/DSL/Ruuter.public/services/GET/sticky/example.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'EXAMPLE'"
-  method: get
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-check_for_cookie:
-  switch:
-    - condition: ${incoming.headers.cookie == null || incoming.headers.cookie == ""}
-      next: return_unauthorized
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    cookie: ${incoming.headers.cookie}
-
-extract_cookie_data:
-  call: http.post
-  args:
-    url: "[#SERVICE_RUUTER]/mocks/mock-custom-jwt-userinfo"
-    headers:
-      cookie: ${cookie}
-    body:
-      cookieName: "customJwtCookie"
-  result: jwtResult
-  next: allow_only_admins
-
-allow_only_admins:
-  switch:
-    - condition: ${jwtResult.response.body.response.authorities.includes("ROLE_ADMIN")}
-      next: get_data
-  next: return_unauthorized
-
-get_data:
-  call: reflect.mock
-  args:
-    response:
-      type: "mock-value"
-      id: 1234567
-  result: reflectedRequest
-  next: return_value
-
-return_value:
-  return: ${reflectedRequest.response.body}
-  next: end
-
-return_unauthorized:
-  status: 401
-  return: "unauthorized"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/.guard b/DSL/Ruuter.public/services/POST/.guard
deleted file mode 100644
index 4fd565b..0000000
--- a/DSL/Ruuter.public/services/POST/.guard
+++ /dev/null
@@ -1,28 +0,0 @@
-check_for_cookie:
-  switch:
-    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
-      next: guard_fail
-  next: authenticate
-
-authenticate:
-  template: "[#SERVICE_PROJECT_LAYER]/check-user-authority"
-  requestType: templates
-  headers:
-    cookie: ${incoming.headers.cookie}
-  result: authority_result
-
-check_authority_result:
-  switch:
-    - condition: ${authority_result !== "false"}
-      next: guard_success
-  next: guard_fail
-
-guard_success:
-  return: "success"
-  status: 200
-  next: end
-
-guard_fail:
-  return: "unauthorized"
-  status: 401
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/auth/.guard b/DSL/Ruuter.public/services/POST/auth/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/POST/auth/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/auth/login.yml b/DSL/Ruuter.public/services/POST/auth/login.yml
deleted file mode 100644
index 3077193..0000000
--- a/DSL/Ruuter.public/services/POST/auth/login.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'LOGIN'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: login
-        type: string
-        description: "Body field 'login'"
-      - field: password
-        type: string
-        description: "Body field 'password'"
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.login == null || incoming.body.password == null}
-      next: return_incorrect_request
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    login: ${incoming.body.login}
-    password: ${incoming.body.password}
-  next: login_user
-
-login_user:
-  call: http.post
-  args:
-    url: "[#SERVICE_USERS_RESQL]/get-user-by-login"
-    body:
-      login: ${login}
-      password: ${password}
-  result: results
-  next: check_login_result  
-
-check_login_result:
-  switch:
-    - condition: ${results.response.body.length != 0}
-      next: get_session_length
-  next: return_user_not_found
-
-get_session_length:
-  call: http.post
-  args:
-    url: "[#SERVICE_USERS_RESQL]/get-configuration"
-    body:
-      key: "session_length"
-  result: session_result
-  next: check_session_length_result
-
-check_session_length_result:
-  switch:
-    - condition: ${session_result.response.body.length != 0}
-      next: generate_cookie
-  next: return_session_length_not_found
-
-generate_cookie:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-generate"
-    body:
-      JWTName: "customJwtCookie"
-      expirationInMinutes: ${session_result.response.body[0].value}
-      content: ${results.response.body[0]}
-  result: cookie_result
-  next: assign_cookie
-
-assign_cookie:
-  assign:
-    setCookie:
-      customJwtCookie: ${cookie_result.response.body.token}
-      Domain: "[#DOMAIN]"
-      Secure: true
-      HttpOnly: true
-      SameSite: "Lax"
-  next: return_value
-
-return_value:
-  headers:
-    Set-Cookie: ${setCookie}
-  return: ${cookie_result.response.body.token}
-  next: end 
-
-return_session_length_not_found:
-  status: 400
-  return: "Could not fetch session length"
-  next: end  
-
-return_user_not_found:
-  status: 400
-  return: "User Not Found"
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/csv.yml b/DSL/Ruuter.public/services/POST/csv.yml
deleted file mode 100644
index 52e1f17..0000000
--- a/DSL/Ruuter.public/services/POST/csv.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CSV'"
-  method: post
-  accepts: json
-  returns: data
-  namespace: service
-  allowlist:
-    body:
-      - field: data
-        type: string
-        description: "Body field 'data'"
-      - field: del
-        type: string
-        description: "Body field 'del'"
-      - field: qul
-        type: string
-        description: "Body field 'qul'"
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body == null}
-      next: return_incorrect_request
-  next: get_csv   
-
-get_csv:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/get-csv"
-    headers:
-      type: 'csv'
-    body:
-      data: ${incoming.body.data}
-      del: ${incoming.body.del}
-      qul: ${incoming.body.qul}
-      layout: false
-  result: result
-
-return_value:
-  wrapper: false
-  headers:
-    Content-disposition: "attachment;filename=result.csv"
-  return: ${result.response.body.response}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: 'missing parameters'
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml b/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
deleted file mode 100644
index 2c5afd4..0000000
--- a/DSL/Ruuter.public/services/POST/dates/calculate-difference.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CALCULATE-DIFFERENCE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: startDate
-        type: string
-        description: "Body field 'startDate'"
-      - field: endDate
-        type: string
-        description: "Body field 'endDate'"
-    params:
-      - field: outputType
-        type: string
-        description: "Parameter 'outputType'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: return_incorrect_request
-  next: check_for_required_parameters
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.startDate == null}
-      next: return_incorrect_request
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    outputType: ${incoming.params.outputType ?? 'days'}
-    startDate: ${incoming.body.startDate}
-    endDate: ${incoming.body.endDate ?? new Date().toISOString()}
-
-check_is_end_date_greater_than_start_date:
-  switch:
-    - condition: ${new Date(endDate) < new Date(startDate)}
-      next: return_incorrect_date
-
-check_is_output_type_valid:
-  switch:
-    - condition: ${outputType !== null && !['years','months','hours','days','minutes', 'seconds'].includes(outputType)}
-      next: return_incorrect_output_type
-
-calculate_difference:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/calculate-date-difference"
-    headers:
-      type: "json"
-    body:
-      startDate: ${startDate}
-      endDate: ${endDate}
-      outputType: ${outputType}
-      layout: false
-  result: result
-  next: return_value
-
-return_value:
-  status: 200
-  return: ${result.response.body}
-  next: end
-
-return_incorrect_date:
-  status: 400
-  return: "Start date can not be greater than the end date/ today"
-  next: end
-
-return_incorrect_output_type:
-  status: 400
-  return: "Output type must be: years, months, hours, days, minutes or seconds"
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Start date is required"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/endpoints/common.yml b/DSL/Ruuter.public/services/POST/endpoints/common.yml
deleted file mode 100644
index 611faca..0000000
--- a/DSL/Ruuter.public/services/POST/endpoints/common.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'Common'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: pagination
-        type: string
-        description: "Body field 'pagination'"
-      - field: page
-        type: string
-        description: "Body field 'page'"
-      - field: pageSize
-        type: string
-        description: "Body field 'pageSize'"
-      - field: sorting
-        type: string
-        description: "Body field 'sorting'"  
-      - field: search
-        type: string
-        description: "Body field 'search'"
-
-extract_request_data:
-  assign:
-    pagination: ${incoming.body.pagination}
-    page: ${incoming.body.page}
-    pageSize: ${incoming.body.pageSize}
-    sorting: ${incoming.body.sorting}
-    search: ${incoming.body.search}
-
-get_common_endpoints:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/get_common_endpoints"
-    body:
-      pagination: ${pagination}
-      page: ${page}
-      page_size: ${pageSize}
-      sorting: ${sorting}
-      search: ${search}
-  result: res
-
-return_result:
-  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/file/rename.yml b/DSL/Ruuter.public/services/POST/file/rename.yml
deleted file mode 100644
index c0e7b1c..0000000
--- a/DSL/Ruuter.public/services/POST/file/rename.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'RENAME'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: current_path
-        type: string
-        description: "Body field 'current_path'"
-      - field: new_path
-        type: string
-        description: "Body field 'new_path'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.current_path == null || incoming.body.new_path == null}
-      next: return_incorrect_request
-
-rename_file:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/rename"
-    headers:
-      type: 'json'
-    body:
-      current_path: ${incoming.body.current_path}
-      new_path: ${incoming.body.new_path}
-      layout: false
-  result: result
-
-return_value:
-  status: 200
-  return: ${result.response.body}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Start date is required"
diff --git a/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml b/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
deleted file mode 100644
index eabaf3f..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/RBAC-mock.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'RBAC-MOCK'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: allowedRoles
-        type: object
-        description: "Body field 'allowedRoles'"
-      - field: userId
-        type: string
-        description: "Body field 'userId'"
-
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: missing_parameter
-
-extract_request_data:
-  assign:
-    userId: ${incoming.body.userId}
-    allowedRoles: ${incoming.body.allowedRoles.sort()}
-
-check_for_required_parameters:
-  switch:
-    - condition: ${userId === null || allowedRoles === null}
-      next: missing_parameter
-  next: fetch_user_roles_from_db
-
-fetch_user_roles_from_db:
-  call: reflect.mock
-  args:
-    response:
-      isAllowed: TRUE
-  result: result
-
-return_value:
-  status: 200
-  return: "${result.response.body}"
-  next: end
-
-missing_parameter:
-  status: 400
-  return: "userId, allowedRoles - missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml b/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
deleted file mode 100644
index 261e227..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/dates/calculate-difference.yml
+++ /dev/null
@@ -1,136 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CALCULATE-DIFFERENCE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: startDate
-        type: string
-        description: "Body field 'startDate'"
-      - field: endDate
-        type: string
-        description: "Body field 'endDate'"
-    params:
-      - field: outputType
-        type: string
-        description: "Parameter 'outputType'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: return_incorrect_request
-  next: check_for_required_parameters
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.startDate == null}
-      next: return_incorrect_request
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    outputType: ${incoming.params.outputType ?? 'days'}
-    startDate: ${incoming.body.startDate}
-    endDate: ${incoming.body.endDate ?? new Date().toISOString()}
-
-check_is_end_date_greater_than_start_date:
-  switch:
-    - condition: ${new Date(endDate) < new Date(startDate)}
-      next: return_incorrect_date
-
-check_is_output_type_valid:
-  switch:
-    - condition: ${outputType !== null && !['years','months','hours','days','minutes', 'seconds'].includes(outputType)}
-      next: return_incorrect_output_type
-
-calculate_difference:
-  switch:
-    - condition: ${outputType === 'years'}
-      next: calculate_difference_in_years
-    - condition: ${outputType === 'months'}
-      next: calculate_difference_in_months
-    - condition: ${outputType === 'hours'}
-      next: calculate_difference_in_hours
-    - condition: ${outputType === 'minutes'}
-      next: calculate_difference_in_minutes
-    - condition: ${outputType === 'seconds'}
-      next: calculate_difference_in_seconds
-  next: calculate_difference_in_days
-
-calculate_difference_in_years:
-  call: reflect.mock
-  args:
-    response: {
-      result: 0
-    }
-  result: result
-  next: return_value
-
-calculate_difference_in_months:
-  call: reflect.mock
-  args:
-    response: {
-      result: 11
-    }
-  result: result
-  next: return_value
-
-calculate_difference_in_days:
-  call: reflect.mock
-  args:
-    response: {
-      result: 1
-    }
-  result: result
-  next: return_value
-
-calculate_difference_in_hours:
-  call: reflect.mock
-  args:
-    response: {
-      result: 24
-    }
-  result: result
-  next: return_value
-
-calculate_difference_in_minutes:
-  call: reflect.mock
-  args:
-    response: {
-      result: 59
-    }
-  result: result
-  next: return_value
-
-calculate_difference_in_seconds:
-  call: reflect.mock
-  args:
-    response: {
-      result: 201
-    }
-  result: result
-  next: return_value
-
-return_value:
-  status: 200
-  return: ${result.response.body}
-  next: end
-
-return_incorrect_date:
-  status: 400
-  return: "Start date can not be greater than the end date/ today"
-  next: end
-
-return_incorrect_output_type:
-  status: 400
-  return: "Output type must be: years, months, hours, days, minutes or seconds"
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Start date is required"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/mocks/service-settings.yml b/DSL/Ruuter.public/services/POST/mocks/service-settings.yml
deleted file mode 100644
index c22da07..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/service-settings.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICE-SETTINGS'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-
-call_mock:
-  call: reflect.mock
-  args:
-    response:
-      maxInputTry: 4
-  result: mock_res
-
-return_result:
-  wrapper: false
-  return: ${mock_res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/mocks/services/add.yml b/DSL/Ruuter.public/services/POST/mocks/services/add.yml
deleted file mode 100644
index 3b1e46c..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/services/add.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ADD'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: name
-        type: string
-        description: "Body field 'name'"
-      - field: description
-        type: string
-        description: "Body field 'description'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: return_incorrect_request
-  next: check_for_required_parameters
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.name == null || incoming.body.description == null}
-      next: return_incorrect_request
-  next: service_add
-
-service_add:
-  call: reflect.mock
-  args:
-    response: {}
-  result: createdService
-
-return_value:
-  status: 201
-  wrapper: FALSE
-  return: ""
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
diff --git a/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml b/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
deleted file mode 100644
index e8ea3f8..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/services/open-api-spec-mock.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'OPEN-API-SPEC-MOCK'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-
-get_message:
-  call: http.get
-  args:
-    url: https://petstore3.swagger.io/api/v3/openapi.json
-  result: res
-
-return_value:
-  return: ${res.response.body}
diff --git a/DSL/Ruuter.public/services/POST/mocks/user-info.yml b/DSL/Ruuter.public/services/POST/mocks/user-info.yml
deleted file mode 100644
index ac0b322..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/user-info.yml
+++ /dev/null
@@ -1,52 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USER-INFO'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: isTokenExpired
-        type: boolean
-        description: "Body field 'isTokenExpired'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: return_user_info_mock
-  next: do_decision
-
-do_decision:
-  switch:
-    - condition: ${incoming.body.isTokenExpired === true}
-      next: return_unauthorized
-  next: return_user_info_mock
-
-return_unauthorized:
-  status: 401
-  return: "Unauthorized"
-  next: end
-
-return_user_info_mock:
-  call: reflect.mock
-  args:
-    response:
-      sub: ""
-      firstName: "MARY ÄNN"
-      idCode: "EE60001019906"
-      displayName: "MARY ÄNN"
-      iss: "test.buerokratt.ee"
-      exp: 1670250948
-      login: "EE60001019906"
-      iat: 1670243748
-      jti: "e14a5084-3b30-4a55-8720-c2ee22f43c2c"
-      authorities: [
-        "ROLE_ADMINISTRATOR"
-      ]
-  result: reflected_request
-  next: return_value
-
-return_value:
-  return: ${reflected_request.response}
diff --git a/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml b/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
deleted file mode 100644
index fb8cb4a..0000000
--- a/DSL/Ruuter.public/services/POST/mocks/validation-mock.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'VALIDATION-MOCK'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-
-call_mock:
-  call: reflect.mock
-  args:
-    response:
-      project: "Bürokratt"
-      website: "www.kratid.ee"
-  result: mock_res
-
-call_template:
-  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
-  requestType: templates
-  body:
-    response: ${mock_res.response}
-  result: templateResult
-
-return_result:
-  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/POST/rasa/rules/add.yml b/DSL/Ruuter.public/services/POST/rasa/rules/add.yml
deleted file mode 100644
index c41581a..0000000
--- a/DSL/Ruuter.public/services/POST/rasa/rules/add.yml
+++ /dev/null
@@ -1,129 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ADD'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: data
-        type: object
-        description: "Body field 'data'"
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-assign_values:
-  assign:
-    body: ${incoming.body.data}
-
-validateRules:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/validate/validate-stories-rules"
-    body:
-      story: ${body}
-      category: "rules"
-  result: validateRulesResult
-
-validateRulesCheck:
-  switch:
-    - condition: ${validateRulesResult.response.body.result == true}
-      next: getRuleNames
-  next: returnDuplicateIntentOrEntity
-
-getRuleNames:
-  call: http.get
-  args:
-    url: "[#SERVICE_RUUTER]/rasa/rule-names"
-    headers:
-      cookie: ${incoming.headers.cookie}
-  result: ruleResult
-
-validateRuleName:
-  switch:
-    - condition: ${ruleResult.response.body.response.names == null}
-      next: getFileLocations
-    - condition: ${!ruleResult.response.body.response.names.includes(body.id)}
-      next: getFileLocations
-  next: returnStoryExists
-
-getFileLocations:
-  call: http.get
-  args:
-    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
-    headers:
-      cookie: ${incoming.headers.cookie}
-  result: fileLocations
-
-getRulesFile:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/read-file"
-    body:
-      file_path: ${fileLocations.response.body.response.rules_location}
-  result: ruleFile
-
-convertYamlToJson:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/yaml_to_json"
-    body:
-      file: ${ruleFile.response.body.file}
-  result: rulesData
-
-mergeRules:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/merge"
-    body:
-      array1: ${rulesData.response.body.rules ?? []}
-      array2: ${[body]}
-      iteratee: "rule"
-  result: mergedRules
-
-convertJsonToYaml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json-to-yaml-stories"
-    headers:
-      content-type: "application/json"
-    body:
-      rules: ${mergedRules.response.body.array}
-  result: rulesYaml
-
-saveRulesFile:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: ${fileLocations.response.body.response.rules_location}
-      content: ${rulesYaml.response.body.json}
-  result: fileResult
-  next: updateOpenSearch
-
-updateOpenSearch:
-  call: http.post
-  args:
-    url: "[#SERVICES_PIPELINE]/bulk/rules/rule"
-    body:
-      input: ${rulesYaml.response.body.json}
-  result: updateSearchResult
-  next: returnSuccess
-
-returnSuccess:
-  return: "Rule added"
-  next: end
-
-returnRuleExists:
-  return: "Rule exists"
-  status: 409
-  next: end
-
-returnDuplicateIntentOrEntity:
-  return: "Rule may not have duplicate consecutive intents or entities"
-  status: 406
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/saveJsonToYml.yml b/DSL/Ruuter.public/services/POST/saveJsonToYml.yml
deleted file mode 100644
index 4305cb2..0000000
--- a/DSL/Ruuter.public/services/POST/saveJsonToYml.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SAVEJSONTOYML'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: result
-        type: string
-        description: "Body field 'result'"
-    params:
-      - field: location
-        type: string
-        description: "Parameter 'location'"
-
-toYml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
-    body:
-      data: ${incoming.body.result}
-  result: r
-
-saveFile:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: ${incoming.params.location}
-      content: ${r.response.body.yaml}
-  result: fileResult
-
-saved_seccessfully:
-  reloadDsl: true
-  return: ""
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/service-by-id.yml b/DSL/Ruuter.public/services/POST/service-by-id.yml
deleted file mode 100644
index 45fd1b0..0000000
--- a/DSL/Ruuter.public/services/POST/service-by-id.yml
+++ /dev/null
@@ -1,90 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICE-BY-ID'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    header:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-    body:
-      - field: id
-        type: string
-        description: "Body field 'id'"
-      - field: search
-        type: string
-        description: "Body field 'search'"
-
-extract_request_data:
-  assign:
-    id: ${incoming.body.id}
-    search: ${incoming.body.search}
-
-get_user_info:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
-    contentType: plaintext
-    headers:
-      cookie: ${incoming.headers.cookie}
-    plaintext: "customJwtCookie"
-  result: res
-
-check_user_info_response:
-  switch:
-    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
-      next: assignIdCode
-  next: return_unauthorized
-
-assignIdCode:
-  assign:
-    idCode: ${res.response.body.idCode}
-
-get_service_by_id:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-by-id"
-    body:
-      id: ${id}
-  result: service_results
-
-get_endpoints_by_service_id:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/get_endpoints_by_service_id"
-    body:
-      id: ${id}
-      user_id_code: ${idCode}
-      search: ${search}
-  result: endpoints_results
-
-prepare_results:
-  assign:
-    results:
-      id: ${service_results.response.body[0].id}
-      name: ${service_results.response.body[0].name}
-      description: ${service_results.response.body[0].description}
-      slot: ${service_results.response.body[0].slot}
-      examples: ${service_results.response.body[0].examples}
-      entities: ${service_results.response.body[0].entities}
-      state: ${service_results.response.body[0].state}
-      type: ${service_results.response.body[0].type}
-      isCommon: ${service_results.response.body[0].isCommon}
-      structure: ${service_results.response.body[0].structure}
-      endpoints: ${endpoints_results.response.body}
-      serviceId: ${service_results.response.body[0].serviceId}
-
-return_ok:
-  status: 200
-  wrapper: false
-  return: ${results}
-  next: end
-
-return_unauthorized:
-  status: 401
-  return: "unauthorized"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/service-settings.yml b/DSL/Ruuter.public/services/POST/service-settings.yml
deleted file mode 100644
index af9021c..0000000
--- a/DSL/Ruuter.public/services/POST/service-settings.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICE-SETTINGS'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: name
-        type: string
-        description: "Body field 'name'"
-      - field: value
-        type: string
-        description: "Body field 'value'"
-
-updateSettings:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/update-settings"
-    body:
-      name: ${incoming.body.name}
-      value: ${incoming.body.value}
-  result: getResult
-
-returnSuccess:
-  status: 200
-  return: 'ok'
diff --git a/DSL/Ruuter.public/services/POST/services.yml b/DSL/Ruuter.public/services/POST/services.yml
deleted file mode 100644
index 8188fac..0000000
--- a/DSL/Ruuter.public/services/POST/services.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SERVICES'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: page
-        type: number
-        description: "Body field 'page'"
-      - field: page_size
-        type: number
-        description: "Body field 'page_size'"
-      - field: sorting
-        type: string
-        description: "Body field 'sorting'"
-      - field: is_common
-        type: boolean
-        description: "Body field 'is_common'"  
-
-get_services_list:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-services-list"
-    body:
-      page: ${incoming.body.page}
-      page_size: ${incoming.body.page_size}
-      sorting: ${incoming.body.sorting}
-      is_common: ${incoming.body.is_common}
-  limit: 400
-  result: services_res
-
-assign_services_result:
-  assign:
-    services: ${services_res.response.body}
-
-return_ok:
-  status: 200
-  return: ${[services]}
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/active/.guard b/DSL/Ruuter.public/services/POST/services/active/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml b/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
deleted file mode 100644
index ff07aba..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/Broneeringu_kinnitus.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Teenuse test, mis kinnitab kasutaja broneeringu ja tagastab
-    sisestatud väärtuse.
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: assign_1
-assign_1:
-  assign:
-    entity: ${incoming.body.input[0]}
-  next: sõnum_kliendile_1
-sõnum_kliendile_1:
-  assign:
-    res:
-      result: "Teie broneering on registreeritud. Kohtumiseni! Entity: ${entity}"
-  next: teenuse_lõpetamine_1
-teenuse_lõpetamine_1:
-  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
-  requestType: templates
-  body:
-    message: ""
-  result: teenuse_lõpetamine_1_result
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml b/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
deleted file mode 100644
index 18b213d..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/Kalastusloa_uuendamise_teade.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Teenuse test, mis teavitab kasutajat, et tema kalastusluba vajab uuendamist.
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: send_message_to_client_1
-send_message_to_client_1:
-  assign:
-    res:
-      result: Su kalastusluba vajab uuendamist!
-  next: end_service_1
-end_service_1:
-  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
-  requestType: templates
-  body:
-    message: ""
-  result: end_service_1_result
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml b/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
deleted file mode 100644
index df123c0..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/Koolivaheajad.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Kasutaja soovib infot koolivaheaegade kohta antud õppeaastal.
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: sõnum_kliendile_1
-sõnum_kliendile_1:
-  assign:
-    res:
-      result: E 20. oktoober 2025 - P 26. oktoober 2025 Sügisvaheaeg E 22. detsember
-        2025 - P 11. jaanuar 2026 Jõuluvaheaeg E 23. veebruar 2026 - P 01. märts
-        2026 Talvevaheaeg E 13. aprill 2026 - P 19. aprill 2026 Kevadvaheaeg K
-        17. juuni 2026 - E 31. august 2026 Suvevaheaeg
-  next: teenuse_lõpetamine_1
-teenuse_lõpetamine_1:
-  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
-  requestType: templates
-  body:
-    message: ""
-  result: teenuse_lõpetamine_1_result
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml b/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
deleted file mode 100644
index 67373c3..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/Lihtne_test_teenus.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Lihtne testteenus, mis küsib kasutajalt valikvastustega küsimusi ja
-    suunab vastavalt vastusele järgmisse sammu.
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: multi_choice_question_1
-multi_choice_question_1:
-  assign:
-    buttons:
-      - id: "1"
-        title: Jah
-        payload: "#service, /POST/services/active/lihtne_teenus_test_mcq_1_0"
-      - id: "2"
-        title: Ei
-        payload: "#service, /POST/services/active/lihtne_teenus_test_mcq_1_1"
-    res:
-      result: lithsa teenuse küsims
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml b/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
deleted file mode 100644
index 0f85846..0000000
--- a/DSL/Ruuter.public/services/POST/services/active/customer_feedback.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Description placeholder for 'customer_feedback'
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: assign_1
-assign_1:
-  assign:
-    customer_rating: ${incoming.body.input[0]}
-  next: condition_1
-condition_1:
-  switch:
-    - condition: ${customer_rating > 3}
-      next: send_message_to_client_1
-  next: send_message_to_client_2
-send_message_to_client_1:
-  assign:
-    res:
-      result: Thank you for your positive feedback with ${customer_rating}
-  next: end_service_1
-send_message_to_client_2:
-  assign:
-    res:
-      result: Thank you for your feedback with rating ${customer_rating} . and we are
-        trying to improve the system
-  next: end_service_2
-end_service_1:
-  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
-  requestType: templates
-  body:
-    message: ""
-  result: end_service_1_result
-  next: format_messages
-end_service_2:
-  template: "[#SERVICE_PROJECT_LAYER]/end-conversation"
-  requestType: templates
-  body:
-    message: ""
-  result: end_service_2_result
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/add.yml b/DSL/Ruuter.public/services/POST/services/add.yml
deleted file mode 100644
index a9a81ec..0000000
--- a/DSL/Ruuter.public/services/POST/services/add.yml
+++ /dev/null
@@ -1,177 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ADD'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    header:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-    body:
-      - field: content
-        type: string
-        description: "Body field 'content'"
-      - field: description
-        type: string
-        description: "Body field 'description'"
-      - field: entities
-        type: object
-        description: "Body field 'Entities'"
-      - field: examples
-        type: object
-        description: "Body field 'Examples'"
-      - field: isCommon
-        type: boolean
-        description: "Body field 'isCommon'"
-      - field: name
-        type: string
-        description: "Body field 'name'"
-      - field: serviceId
-        type: string
-        description: "Body field 'serviceId'"
-      - field: structure
-        type: object
-        description: "Body field 'structure'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-      - field: updateServiceDb
-        type: boolean
-        description: "Body field 'updateServiceDb'"
-      - field: state
-        type: string
-        description: "Body field 'state'"  
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.name == null || incoming.body.description == null || incoming.body.type == null || incoming.body.content == null || incoming.body.serviceId == null || incoming.body.isCommon == null || incoming.body.structure == null}
-      next: return_incorrect_request
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    name: ${incoming.body.name}
-    description: ${incoming.body.description}
-    slot: ${""}
-    entities: ${incoming.body.entities}
-    examples: ${incoming.body.examples}
-    type: ${incoming.body.type.toUpperCase()}
-    content: ${incoming.body.content}
-    serviceId: ${incoming.body.serviceId}
-    isCommon: ${incoming.body.isCommon}
-    structure: ${incoming.body.structure}
-    state: ${incoming.body.state}
-  next: check_if_update_service_db
-
-check_if_update_service_db:
-  switch:
-    - condition: ${incoming.body.updateServiceDb === true}
-      next: check_if_name_exists
-  next: delete_all_mcq_files
-
-check_if_name_exists:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/services/check_name_exist"
-    body:
-      name: ${name}
-  result: name_exists_res
-  next: check_name_exists_result
-
-check_name_exists_result:
-  switch:
-    - condition: ${name_exists_res.response.body[0].nameExists}
-      next: return_name_already_exists
-  next: service_add   
-
-service_add:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/add"
-    body:
-      name: ${name}
-      description: ${description}
-      slot: ${slot}
-      entities: ${entities}
-      examples: ${examples}
-      ruuter_type: ${type}
-      service_id: ${serviceId}
-      is_common: ${isCommon}
-      state: ${state}
-      structure: ${structure}
-  result: createdService
-  next: convert_json_content_to_yml
-
-delete_all_mcq_files:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
-    body:
-      path: "[#RUUTER_SERVICES_PATH]/${type}/services/draft"
-      keyword: "${name}_"
-  result: deleteRes 
-
-convert_json_content_to_yml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
-    body:
-      data: ${content}
-  result: ymlResult
-
-check_for_type:
-  switch:
-    - condition: ${type === 'GET'}
-      next: add_get_dsl
-  next: add_post_dsl
-
-add_get_dsl:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: "[#RUUTER_SERVICES_GET_PATH]/draft/${name}.tmp"
-      content: ${ymlResult.response.body.yaml}
-  result: results
-  next: check_result
-
-add_post_dsl:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: "[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp"
-      content: ${ymlResult.response.body.yaml}
-  result: results
-  next: check_result
-
-check_result:
-  switch:
-    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
-      next: return_ok
-  next: return_bad_request
-
-return_ok:
-  reloadDsl: true
-  status: 200
-  return: ${results.response.body.message}
-  next: end
-
-return_bad_request:
-  status: 400
-  return: ${results.response.body.message}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
-  next: end
-
-return_name_already_exists:
-  status: 409
-  return: "Service name already exists"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/create-endpoint.yml b/DSL/Ruuter.public/services/POST/services/create-endpoint.yml
deleted file mode 100644
index 2c73fd4..0000000
--- a/DSL/Ruuter.public/services/POST/services/create-endpoint.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Creates a new endpoint"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: endpointId
-        type: string
-        description: "Endpoint UUID"
-      - field: name
-        type: string
-        description: "Endpoint name"
-      - field: type
-        type: string
-        description: "Endpoint type"
-      - field: isCommon
-        type: boolean
-        description: "Endpoint common status"
-      - field: serviceId
-        type: string
-        description: "Service UUID"
-      - field: definitions
-        type: object
-        description: "Endpoint definitions"
-
-create_endpoint:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/create_endpoint"
-    body:
-      endpointId: ${incoming.body.endpointId}
-      name: ${incoming.body.name}
-      type: ${incoming.body.type}
-      isCommon: ${incoming.body.isCommon}
-      serviceId: ${incoming.body.serviceId ?? ''}
-      definitions: ${incoming.body.definitions}
-  result: res
-
-return_ok:
-  status: 200
-  return: "Endpoint created"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml b/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
deleted file mode 100644
index f585495..0000000
--- a/DSL/Ruuter.public/services/POST/services/delete-endpoint.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Deletes an endpoint"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: id
-        type: string
-        description: "Endpoint UUID"
-
-delete_endpoint:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/delete_endpoint"
-    body:
-      id: ${incoming.body.id}
-  result: res
-
-remove_from_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/remove_endpoint_from_preferences"
-    body:
-      endpoint_id: ${incoming.body.id}
-  result: preferences_res
-
-return_ok:
-  status: 200
-  return: "Endpoint deleted"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/delete.yml b/DSL/Ruuter.public/services/POST/services/delete.yml
deleted file mode 100644
index c9a7bef..0000000
--- a/DSL/Ruuter.public/services/POST/services/delete.yml
+++ /dev/null
@@ -1,155 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'DELETE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-    body:
-      - field: id
-        type: string
-        description: "Body field 'id'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body.id == null || incoming.body.type == null}
-      next: return_incorrect_request
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    id: ${incoming.body.id}
-    ruuter_type: ${incoming.body.type.toUpperCase()}
-  next: get_service_name
-
-get_service_name:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-name-by-id"
-    body:
-      id: ${id}
-  result: name_res
-  next: get_current_status
-
-get_current_status:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/status"
-    body:
-      id: ${id}
-  result: status_res
-  next: check_status
-
-check_status:
-  switch:
-    - condition: ${status_res.response.body[0].currentState === 'active'}
-      next: return_cannot_delete_active_service
-    - condition: ${status_res.response.body[0].currentState === 'ready'}
-      next: assign_draft_path
-  next: assign_old_path
-
-assign_old_path:
-  assign:
-    old_file_status_path: ${status_res.response.body[0].currentState}
-  next: delete_service
-
-assign_draft_path:
-  assign:
-    old_file_status_path: "draft"
-  next: delete_service
-
-delete_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/delete-service"
-    body:
-      id: ${id}
-  result: res
-  next: check_service_file_exists
-
-check_service_file_exists:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/exists"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name_res.response.body[0].name}.tmp"
-  result: service_file_exists
-  next: validate_service_file_exists
-
-validate_service_file_exists:
-  switch:
-    - condition: ${!!service_file_exists.response.body}
-      next: delete_deactivated_service
-  next: delete_endpoints_by_service_id
-
-delete_deactivated_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/delete"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name_res.response.body[0].name}.tmp"
-  result: results
-  next: check_result
-
-check_result:
-  switch:
-    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
-      next: delete_endpoints_by_service_id
-  next: return_bad_request
-
-delete_endpoints_by_service_id:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/delete_endpoints_by_service_id"
-    body:
-      serviceId: ${id}
-  result: delete_endpoint_results
-  next: remove_service_endpoints_from_preferences
-
-remove_service_endpoints_from_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/remove_service_endpoints_from_preferences"
-    body:
-      serviceId: ${id}
-  result: remove_preferences_results
-  next: delete_mcq_files
-
-delete_mcq_files:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
-    body:
-      path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
-      keyword: "${name_res.response.body[0].name}_"
-  result: deleted_mcq
-
-return_ok:
-  reloadDsl: true
-  status: 200
-  return: "Service Deleted Successfully"
-  next: end
-
-return_bad_request:
-  status: 400
-  return: ${results.response.body.message}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
-  next: end
-
-return_cannot_delete_active_service:
-  status: 400
-  return: "Cannot delete active service"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml b/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
deleted file mode 100644
index b38fcd4..0000000
--- a/DSL/Ruuter.public/services/POST/services/domain-intent-service-link.yml
+++ /dev/null
@@ -1,157 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'domain-intent-service-link'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: intent
-        type: string
-        description: "Body field 'intent'"
-      - field: serviceName
-        type: string
-        description: "Body field 'serviceName'"
-      - field: serviceMethod
-        type: string
-        description: "Body field 'serviceMethod'"
-      - field: serviceSlot
-        type: string
-        description: "Slot input"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-    header:
-      - field: cookie
-        type: string
-        description: "Cookie field"    
-
-extract_request_data:
-  assign:
-    serviceName: "${incoming.body.serviceName || ''}"
-    serviceMethod: "${incoming.body.serviceMethod || 'POST'}"
-    serviceSlot: ", ({${incoming.body.serviceSlot ? incoming.body.serviceSlot : ''}})"
-    intent: "${incoming.body.intent}"
-    type: "${incoming.body.type}"
-    service_path: "#service, /${serviceMethod}/services/active/${serviceName + (incoming.body.serviceSlot ? serviceSlot : '')}"
-
-get_file_locations:
-  call: http.get
-  args:
-    url: "[#SERVICE_RUUTER]/internal/return-file-locations"
-    headers:
-      cookie: ${incoming.headers.cookie}
-  result: fileLocations
-  next: get_domain_file
-
-get_domain_file:
-  call: http.get
-  args:
-    url: "[#SERVICE_RUUTER]/internal/domain-file"
-    headers:
-      cookie: ${incoming.headers.cookie}
-  result: domainData
-  next: assign_domain_file_data
-
-assign_domain_file_data:
-  assign:
-    domain_data_json: ${domainData.response.body.response}
-  next: check_if_intent_exists
-
-check_if_intent_exists:
-  switch:
-    - condition: ${domain_data_json.intents.includes(intent)}
-      next: update_existing_domain_response
-  next: return_intent_does_not_exist
-
-update_existing_domain_response:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/domain/update-existing-response"
-    body:
-      json: ${domain_data_json.responses}
-      searchKey: ${intent}
-      newKey: ${"utter_" + intent}
-      newKeyValue: '${type === "Add" ? service_path : "UNKNOWN"}'
-      deleteOldValue: false
-      createIfAbsent: true
-  result: updatedResponses
-  next: check_for_type
-
-check_for_type:
-  switch:
-    - condition: ${type === "Add"}
-      next: assignRuleData
-  next: convert_domain_json_to_yaml
-
-assignRuleData:
-  assign: 
-     data:
-       rule: "rule${intent}"
-       steps: [
-        {
-          intent: "${intent}",
-        },
-        {
-          action: "utter_${intent}",
-        },
-       ]
-  next: add_rule     
-
-add_rule:
-  call: http.post
-  args:
-    url: "[#SERVICE_RUUTER]/rasa/rules/add"
-    headers:
-      cookie: ${incoming.headers.cookie}
-    body:
-      data: ${data}
-  result: add_rule_res
-  next: convert_domain_json_to_yaml
-
-convert_domain_json_to_yaml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_domain"
-    body:
-      version: ${domain_data_json.version}
-      session_config: ${domain_data_json.session_config}
-      intents: ${domain_data_json.intents}
-      entities: ${domain_data_json.entities}
-      slots: ${domain_data_json.slots}
-      forms: ${domain_data_json.forms}
-      actions: ${domain_data_json.actions}
-      responses: ${updatedResponses.response.body}
-  result: domainYaml
-  next: resave_domain_file
-
-resave_domain_file:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: ${fileLocations.response.body.response.domain_location}
-      content: ${domainYaml.response.body.json}
-  result: fileResult
-  next: updateOpenSearch
-
-updateOpenSearch:
-  call: http.post
-  args:
-    url: "[#SERVICES_PIPELINE]/bulk/domain"
-    body:
-      input: ${domainYaml.response.body.json}
-  result: updateSearchResult
-  next: return_result
-
-return_result:
-  status: 200
-  return: "Connection request sent successfully"
-  next: end
-
-return_intent_does_not_exist:
-  status: 400
-  return: "Intent does not exists"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/draft/.guard b/DSL/Ruuter.public/services/POST/services/draft/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/POST/services/draft/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/draft/test.tmp b/DSL/Ruuter.public/services/POST/services/draft/test.tmp
deleted file mode 100644
index 2625647..0000000
--- a/DSL/Ruuter.public/services/POST/services/draft/test.tmp
+++ /dev/null
@@ -1,48 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: Description placeholder for 'test'
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowList:
-    body:
-      - field: chatId
-        type: string
-        description: The chat ID for the message
-      - field: authorId
-        type: string
-        description: The author ID for the message
-      - field: input
-        type: object
-        description: The Input from the user
-prepare:
-  assign:
-    chatId: ${incoming.body.chatId}
-    authorId: ${incoming.body.authorId}
-    input: ${incoming.body.input}
-    buttons: []
-    res:
-      result: ""
-  next: format_messages
-format_messages:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/bot_responses_to_messages"
-    headers:
-      type: json
-    body:
-      data:
-        botMessages: ${[res]}
-        chatId: ${chatId ?? ''}
-        authorId: ${authorId ?? ''}
-        authorFirstName: ""
-        authorLastName: ""
-        authorTimestamp: ${new Date().toISOString()}
-        created: ${new Date().toISOString()}
-        buttons: ${buttons ?? []}
-  result: formatMessage
-  next: service-end
-service-end:
-  return: ${formatMessage.response.body ?? ''}
diff --git a/DSL/Ruuter.public/services/POST/services/edit.yml b/DSL/Ruuter.public/services/POST/services/edit.yml
deleted file mode 100644
index 5756314..0000000
--- a/DSL/Ruuter.public/services/POST/services/edit.yml
+++ /dev/null
@@ -1,381 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'EDIT'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: content
-        type: string
-        description: "Body field 'content'"
-      - field: description
-        type: string
-        description: "Body field 'description'"
-      - field: isCommon
-        type: boolean
-        description: "Body field 'isCommon'"  
-      - field: entities
-        type: object
-        description: "Body field 'Entities'"
-      - field: examples
-        type: object
-        description: "Body field 'Examples'"
-      - field: name
-        type: string
-        description: "Body field 'name'"
-      - field: structure
-        type: object
-        description: "Body field 'structure'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-      - field: updateServiceDb
-        type: boolean
-        description: "Body field 'updateServiceDb'"
-      - field: state
-        type: string
-        description: "Body field 'state'"  
-    params:
-      - field: id
-        type: string
-        description: "Parameter 'id'"
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"    
-
-extract_request_data:
-  assign:
-    id: ${incoming.params.id}
-    name: ${incoming.body.name}
-    description: ${incoming.body.description}
-    isCommon: ${incoming.body.isCommon}
-    slot: ${""}
-    entities: ${incoming.body.entities}
-    examples: ${incoming.body.examples}
-    type: ${incoming.body.type}
-    content: ${incoming.body.content}
-    structure: ${incoming.body.structure}
-    updateServiceDb: ${incoming.body.updateServiceDb}
-    state: ${incoming.body.state}
-
-check_for_update_service_db:
-  switch:
-    - condition: ${incoming.body.updateServiceDb === true}
-      next: get_service
-  next: check_for_content
-
-get_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/services/get_services_by_ids"
-    body:
-      serviceIds: "${id}"
-  result: get_service_result
-
-check_if_name_is_the_same:
-  switch:
-    - condition: ${get_service_result.response.body[0].name === name}
-      next: delete_all_mcq_files
-  next: check_if_name_exists
-
-check_if_name_exists:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/services/check_name_exist"
-    body:
-      name: ${name}
-  result: name_exists_res
-  next: check_name_exists_result
-
-check_name_exists_result:
-  switch:
-    - condition: ${name_exists_res.response.body[0].nameExists}
-      next: return_name_already_exists
-  next: delete_all_mcq_files   
-
-delete_all_mcq_files:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/delete-all-that-starts-with"
-    body:
-      path: "[#RUUTER_SERVICES_PATH]/${type}/services/draft"
-      keyword: "${name}_"
-  result: deleteRes
-  next: check_for_content
-
-check_for_content:
-  switch:
-    - condition: ${content === null}
-      next: check_for_required_parameters
-  next: convert_json_content_to_yml
-
-convert_json_content_to_yml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data"
-    body:
-      data: ${content}
-  result: ymlResult
-
-check_for_type:
-  switch:
-    - condition: ${type === 'GET'}
-      next: add_get_dsl
-  next: add_post_dsl
-
-add_get_dsl:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: "[#RUUTER_SERVICES_GET_PATH]/draft/${name}.tmp"
-      content: ${ymlResult.response.body.yaml}
-  result: results
-  next: check_for_required_parameters
-
-add_post_dsl:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: "[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp"
-      content: ${ymlResult.response.body.yaml}
-  result: results
-  next: check_for_required_parameters
-
-check_for_required_parameters:
-  switch:
-    - condition: ${id === null || name === null || description === null}
-      next: return_incorrect_request
-    - condition: ${type === null}
-      next: return_incorrect_request
-
-upper_case_type:
-  assign:
-    type: ${type.toUpperCase()}
-
-check_type:
-  switch:
-    - condition: ${type !== 'GET' && type !== 'POST'}
-      next: return_incorrect_request
-
-check_if_update_service_db:
-  switch:
-    - condition: ${incoming.body.updateServiceDb === true}
-      next: get_service_by_id
-  next: return_ok
-
-get_service_by_id:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-by-id"
-    body:
-      id: ${id}
-  result: old_service_result
-
-assign_values:
-  assign:
-    old_name: ${old_service_result.response.body[0].name}
-    old_structure: ${old_service_result.response.body[0].structure}
-    old_state: ${old_service_result.response.body[0].state}
-    service_type: ${old_service_result.response.body[0].type}
-
-check_new_structure:
-  switch:
-    - condition: ${structure === null}
-      next: use_old_structure
-    - condition: ${structure !== null}
-      next: use_new_structure
-
-use_new_structure:
-  assign:
-    new_structure: ${structure}
-  next: rename_dsl
-
-use_old_structure:
-  assign:
-    new_structure: ${old_structure.value}
-  next: rename_dsl
-
-rename_dsl:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${type}/[#RUUTER_SERVICES_DIR_PATH]/${old_state}/${old_name}.tmp"
-      new_path: "[#RUUTER_SERVICES_PATH]/${type}/[#RUUTER_SERVICES_DIR_PATH]/${old_state}/${name}.tmp"
-  result: results
-
-service_edit:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/edit"
-    body:
-      id: ${id}
-      name: ${name}
-      description: ${description}
-      is_common: ${isCommon ?? false}
-      slot: ${slot}
-      examples: ${examples}
-      entities: ${entities}
-      structure: ${new_structure}
-      state: ${state ?? 'draft'}
-  result: editedService
-
-check for_state:
-  switch:
-    - condition: ${state === 'draft'}
-      next: check_remove_blob_then_draft
-    - condition: ${state === 'active'}
-      next: check_if_service_data_exists
-  next: check_remove_blob_then_ok
-
-check_remove_blob_then_draft:
-  switch:
-    - condition: ${old_state === 'active'}
-      next: delete_from_storage_edit
-  next: change_state_to_draft
-
-check_remove_blob_then_ok:
-  switch:
-    - condition: ${old_state === 'active'}
-      next: delete_from_storage_edit
-  next: return_ok
-
-delete_from_storage_edit:
-  call: http.delete
-  args:
-    url: "[#STORAGE_FERRY]/v1/files/delete"
-    body:
-      files:
-        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
-          container: "[#STORAGE_FERRY_CONTAINER]"
-          fileName: "${old_name}.json"
-  result: ferry_delete_result
-  next: check_if_azure_configured_after_delete_edit
-
-check_if_azure_configured_after_delete_edit:
-  switch:
-    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
-      next: trigger_azure_indexer_after_delete_edit
-  next: log_azure_not_configured_after_delete_edit
-
-log_azure_not_configured_after_delete_edit:
-  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
-  next: after_delete_from_storage_edit
-
-trigger_azure_indexer_after_delete_edit:
-  call: http.post
-  args:
-    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
-    headers:
-      api-key: "[#AZURE_SEARCH_API_KEY]"
-      Content-Type: "application/json"
-  result: azure_indexer_result
-  next: after_delete_from_storage_edit
-
-after_delete_from_storage_edit:
-  switch:
-    - condition: ${state === 'draft'}
-      next: change_state_to_draft
-  next: return_ok
-
-change_state_to_draft:
-  call: http.post
-  args:
-    url: "[#SERVICE_RUUTER]/services/status"
-    headers:
-      cookie: ${incoming.headers.cookie}
-    body:
-      id: ${id}
-      state: "draft"
-      type: ${service_type ?? 'POST'}
-  result: changeStateResult
-  next: return_ok
-
-check_if_service_data_exists:
-  switch:
-    - condition: ${old_service_result !== undefined && old_service_result !== null}
-      next: generate_service_json_from_existing
-  next: get_service_data_for_json
-
-get_service_data_for_json:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-by-id"
-    body:
-      id: ${id}
-  result: service_data_result
-  next: generate_service_json
-
-generate_service_json_from_existing:
-  assign:
-    service_data_result: ${old_service_result}
-  next: generate_service_json
-
-generate_service_json:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/service_declaration"
-    headers:
-      type: 'json'
-    body:
-      name: ${name}
-      description: ${service_data_result.response.body[0].description}
-      examples: ${service_data_result.response.body[0].examples}
-      entities: ${service_data_result.response.body[0].entities}
-  result: service_json_result
-  next: replace_in_storage
-
-replace_in_storage:
-  call: http.post
-  args:
-    url: "[#STORAGE_FERRY]/v1/files/create"
-    body:
-      files:
-        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
-          container: "[#STORAGE_FERRY_CONTAINER]"
-          fileName: "${name}.json"
-      content: ${JSON.stringify(service_json_result.response.body)}
-  result: ferry_upload_result
-  next: check_if_azure_configured
-
-check_if_azure_configured:
-  switch:
-    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
-      next: trigger_azure_indexer
-  next: log_azure_not_configured
-
-log_azure_not_configured:
-  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
-  next: return_ok
-
-trigger_azure_indexer:
-  call: http.post
-  args:
-    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
-    headers:
-      api-key: "[#AZURE_SEARCH_API_KEY]"
-      Content-Type: "application/json"
-  result: azure_indexer_result
-  next: return_ok
-
-return_ok:
-  reloadDsl: true
-  status: 200
-  return: "Edited Successfully"
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
-  next: end
-
-return_name_already_exists:
-  status: 409
-  return: "Service name already exists"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml b/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
deleted file mode 100644
index a628079..0000000
--- a/DSL/Ruuter.public/services/POST/services/endpoint-url-validation.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ENDPOINT-URL-VALIDATION'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: type
-        type: string
-        description: "Body field 'type'"
-      - field: url
-        type: string
-        description: "Body field 'url'"
-
-extract_request_data:
-  assign:
-    url: ${incoming.body.url}
-    type: ${incoming.body.type}
-
-call_template:
-  template: "[#SERVICE_PROJECT_LAYER]/validation-template"
-  requestType: templates
-  body:
-    response: ${url}
-    type: ${type}
-  result: templateResult
-
-return_result:
-  return: ${templateResult}
diff --git a/DSL/Ruuter.public/services/POST/services/import-services.yml b/DSL/Ruuter.public/services/POST/services/import-services.yml
deleted file mode 100644
index 89a5164..0000000
--- a/DSL/Ruuter.public/services/POST/services/import-services.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'IMPORT-SERVICES'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: services
-        type: object
-        description: "Body field 'services'"
-      - field: timezone
-        type: string
-        description: "Body field 'timezone'"
-
-extract_request_data:
-  assign:
-    services: ${incoming.body.services ?? []}
-    names: ${services.map(s => s.fileName).join(",") ?? []}
-    timezone: ${incoming.body.timezone}
-
-get_import_names:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-import-names"
-    body:
-      names: ${names}
-      timezone: ${timezone}
-  result: import_names_res
-
-assign_imported_names:
-  assign:
-    imported_names: ${import_names_res.response.body[0].names.split(",")}
-    services: "$=services.map((s, i) => ({ ...s, fileName: imported_names[i] }))="
-    file_names: ${services.map(s => s.fileName)}
-
-insert_services:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/add-services"
-    body:
-      names: ${file_names}
-      structures: ${services.map(s => s.flowData)}
-  result: insert_services_res
-
-convert_json_content_to_yml:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/conversion/json_to_yaml_data_multiple"
-    body:
-      data: ${services.map(s => s.content)}
-  result: ymls_res
-
-prepare_files:
-  assign:
-    file_paths: "$=file_names.map(name => `[#RUUTER_SERVICES_POST_PATH]/draft/${name}.tmp`)="
-    yaml_contents: ${ymls_res.response.body.yamls}
-
-add_dsls:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create_multiple"
-    body:
-      file_paths: ${file_paths}
-      contents: ${yaml_contents}
-  result: add_dsls_res
-
-return_result:
-  return: "Services imported successfully"
diff --git a/DSL/Ruuter.public/services/POST/services/inactive/.guard b/DSL/Ruuter.public/services/POST/services/inactive/.guard
deleted file mode 100644
index 6443537..0000000
--- a/DSL/Ruuter.public/services/POST/services/inactive/.guard
+++ /dev/null
@@ -1,4 +0,0 @@
-guard_allow_all:
-  return: "success"
-  status: 200
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/open-api-spec.yml b/DSL/Ruuter.public/services/POST/services/open-api-spec.yml
deleted file mode 100644
index 3dcc2c8..0000000
--- a/DSL/Ruuter.public/services/POST/services/open-api-spec.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'OPEN-API-SPEC'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: url
-        type: string
-        description: "Body field 'url'"
-
-check_for_required_parameters:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.url == null}
-      next: return_incorrect_request
-  next: get_spec
-
-get_spec:
-  call: http.get
-  args:
-    url: ${incoming.body.url}
-  result: result
-
-return_value:
-  return: ${result.response.body}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "missing parameters"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/requests/explain.yml b/DSL/Ruuter.public/services/POST/services/requests/explain.yml
deleted file mode 100644
index e5fce84..0000000
--- a/DSL/Ruuter.public/services/POST/services/requests/explain.yml
+++ /dev/null
@@ -1,95 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'EXPLAIN'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: requests
-        type: object
-        description: "request object containing multiple requests"
-
-check_for_requests:
-  switch:
-    - condition: ${incoming.body.requests == null}
-      next: missing_requests
-  next: initialize_responses
-
-initialize_responses:
-  assign:
-    requests: ${incoming.body.requests}
-    responses: []
-    index: 0
-  next: process_next_request
-
-process_next_request:
-  switch:
-    - condition: ${index < requests.length}
-      next: assign_current_request
-  next: return_responses
-
-assign_current_request:
-  assign:
-    current_request: ${requests[index]}
-  next: check_method  
-
-check_method:
-  switch:
-    - condition: ${current_request.method.toLowerCase() == 'post'}
-      next: request_explain_post
-  next: request_explain_get
-
-request_explain_get:
-  call: http.get
-  args:
-    url: ${current_request.url}
-    headers:
-      Content-Type: "application/json"
-  result: res
-  next: assign_result
-
-request_explain_post:
-  call: http.post
-  args:
-    url: ${current_request.url}
-    headers:
-      Content-Type: "application/json"
-    body:
-      data: ${current_request.body}
-  result: res
-  next: assign_result
-
-assign_result:
-  assign:
-    result_res: ${[res.response.body]}
-  next: check_responses_list
-
-check_responses_list:
-  switch:
-    - condition: ${responses.length === 0}
-      next: append_first_response
-  next: append_response
-
-append_first_response:
-  assign:
-    responses: ${[responses, ...result_res]}
-    index: ${index + 1}
-  next: process_next_request
-
-append_response:
-  assign:
-    responses: ${[...responses, ...result_res]}
-    index: ${index + 1}
-  next: process_next_request
-
-return_responses:
-  return: ${responses.splice(1 , responses.length - 1)}
-  next: end
-
-missing_requests:
-  status: 400
-  return: "required requests were not provided"
-  next: end
\ No newline at end of file
diff --git a/DSL/Ruuter.public/services/POST/services/resql/add.yml b/DSL/Ruuter.public/services/POST/services/resql/add.yml
deleted file mode 100644
index b936d8f..0000000
--- a/DSL/Ruuter.public/services/POST/services/resql/add.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'ADD'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: name
-        type: string
-        description: "Body field 'name'"
-      - field: sql
-        type: string
-        description: "Body field 'sql'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: missing_parameter
-
-extract_request_data:
-  assign:
-    name: ${incoming.body.name}
-    sql: ${incoming.body.sql}
-
-check_for_required_parameters:
-  switch:
-    - condition: ${name == null || sql == null}
-      next: missing_parameter
-  next: add_resql
-
-missing_parameter:
-  status: 400
-  return: "required parameters were not provided"
-  next: end
-
-add_resql:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/create"
-    body:
-      file_path: "/Resql/services/${name}.sql"
-      content: ${sql}
-  result: results
-
-check_result:
-  switch:
-    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
-      next: return_ok
-  next: return_bad_request
-
-return_ok:
-  status: 200
-  return: ${results.response.body.message}
-  next: end
-
-return_bad_request:
-  status: 400
-  return: ${results.response.body.message}
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/status.yml b/DSL/Ruuter.public/services/POST/services/status.yml
deleted file mode 100644
index 788aad7..0000000
--- a/DSL/Ruuter.public/services/POST/services/status.yml
+++ /dev/null
@@ -1,303 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'STATUS'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: id
-        type: string
-        description: "Body field 'id'"
-      - field: state
-        type: string
-        description: "Body field 'state'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"      
-
-extract_request_data:
-  assign:
-    id: ${incoming.body.id}
-    new_state: ${incoming.body.state}
-    ruuter_type: ${incoming.body.type}
-
-check_for_required_parameters:
-  switch:
-    - condition: ${id === null || new_state === null || ruuter_type === null}
-      next: return_incorrect_request
-    - condition: ${new_state === "ready"}
-      next: set_plain_status
-  next: get_current_status
-
-get_current_status:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/status"
-    body:
-      id: ${id}
-  result: status_res
-  next: assign_old_status_and_path
-
-assign_old_status_and_path:
-  assign:
-    old_file_status_path: "${status_res.response.body[0].currentState === 'ready' ? 'draft' : status_res.response.body[0].currentState}"
-    old_file_end: "${status_res.response.body[0].currentState !== 'active' ? '.tmp' : '.yml'}"
-  next: check_status
-
-check_status:
-  switch:
-    - condition: ${new_state === "draft"}
-      next: set_status
-    - condition: ${status_res.response.body[0].currentState === new_state}
-      next: return_same_state_update
-  next: set_status
-
-set_status:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/set-status"
-    body:
-      id: ${id}
-      new_state: ${new_state}
-  result: res
-  next: get_status_name
-
-set_plain_status:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/set-status"
-    body:
-      id: ${id}
-      new_state: ${new_state}
-  result: draft_res
-  next: return_ok
-
-get_status_name:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-name-by-id"
-    body:
-      id: ${id}
-  result: name_res
-  next: assign_values
-
-assign_values:
-  assign:
-    name: ${name_res.response.body[0].name}
-    service_name: "service_${name_res.response.body[0].name}"
-  next: check_file_exists
-
-check_file_exists:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/exists"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
-  result: service_file_exists_result
-  next: validate_file_exists
-
-validate_file_exists:
-  switch:
-    - condition: ${!!service_file_exists_result.response.body}
-      next: check_for_status
-  next: return_service_file_missing
-
-check_for_status:
-  switch:
-    - condition: ${new_state === "active"}
-      next: activate_service
-    - condition: ${new_state === "draft"}
-      next: draft_service
-  next: deactivate_service
-
-activate_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/active/${name}.yml"
-  result: activate_service_result
-  next: get_service_data_for_json
-
-get_service_data_for_json:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-service-by-id"
-    body:
-      id: ${id}
-  result: service_data_result
-  next: generate_service_json
-
-generate_service_json:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER_HBS]/service_declaration"
-    headers:
-      type: 'json'
-    body:
-      name: ${name}
-      description: ${service_data_result.response.body[0].description}
-      examples: ${service_data_result.response.body[0].examples}
-      entities: ${service_data_result.response.body[0].entities}
-  result: service_json_result
-  next: create_in_storage
-
-create_in_storage:
-  call: http.post
-  args:
-    url: "[#STORAGE_FERRY]/v1/files/create"
-    body:
-      files:
-        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
-          container: "[#STORAGE_FERRY_CONTAINER]"
-          fileName: "${name}.json"
-      content: ${JSON.stringify(service_json_result.response.body)}
-  result: ferry_upload_result
-  next: check_if_azure_configured_after_create
-
-check_if_azure_configured_after_create:
-  switch:
-    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
-      next: trigger_azure_indexer_after_create
-  next: log_azure_not_configured_after_create
-
-log_azure_not_configured_after_create:
-  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
-  next: activate_all_mcq_services
-
-trigger_azure_indexer_after_create:
-  call: http.post
-  args:
-    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
-    headers:
-      api-key: "[#AZURE_SEARCH_API_KEY]"
-      Content-Type: "application/json"
-  result: azure_indexer_result
-  next: activate_all_mcq_services
- 
-activate_all_mcq_services:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/active"
-      keyword: "${name}_"
-      format: "yml"
-  result: active_move_results
-  next: return_ok
-
-deactivate_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/inactive/${name}.tmp"
-  result: deactivate_service_result
-  next: delete_from_storage
-
-delete_from_storage:
-  call: http.delete
-  args:
-    url: "[#STORAGE_FERRY]/v1/files/delete"
-    body:
-      files:
-        - storageAccountId: "[#STORAGE_FERRY_ACCOUNT_ID]"
-          container: "[#STORAGE_FERRY_CONTAINER]"
-          fileName: "${name}.json"
-  result: ferry_delete_result
-  next: check_if_azure_configured_after_delete
-
-check_if_azure_configured_after_delete:
-  switch:
-    - condition: ${"[#AZURE_SEARCH_SERVICE_NAME]" !== "" && "[#AZURE_SEARCH_INDEXER_NAME]" !== "" && "[#AZURE_SEARCH_API_KEY]" !== ""}
-      next: trigger_azure_indexer_after_delete
-  next: log_azure_not_configured_after_delete
-
-log_azure_not_configured_after_delete:
-  log: "Warning! Azure Search configuration not found. Skipping Azure indexer trigger. Please configure AZURE_SEARCH_SERVICE_NAME, AZURE_SEARCH_INDEXER_NAME, and AZURE_SEARCH_API_KEY in constants.ini"
-  next: dactivate_all_mcq_services
-
-trigger_azure_indexer_after_delete:
-  call: http.post
-  args:
-    url: "https://[#AZURE_SEARCH_SERVICE_NAME].search.windows.net/indexers/[#AZURE_SEARCH_INDEXER_NAME]/run?api-version=2024-07-01"
-    headers:
-      api-key: "[#AZURE_SEARCH_API_KEY]"
-      Content-Type: "application/json"
-  result: azure_indexer_result
-  next: dactivate_all_mcq_services
-
-dactivate_all_mcq_services:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/inactive"
-      keyword: "${name}_"
-      format: "tmp"
-  result: inactive_move_results
-  next: return_ok
-
-draft_service:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}/${name + old_file_end}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/draft/${name}.tmp"
-  result: draft_service_result
-  next: draft_all_mcq_services
-
-draft_all_mcq_services:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/file-manager/move-all-that-starts-with"
-    body:
-      file_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/${old_file_status_path}"
-      new_path: "[#RUUTER_SERVICES_PATH]/${ruuter_type}/[#RUUTER_SERVICES_DIR_PATH]/draft"
-      keyword: "${name}_"
-      format: "tmp"
-  result: inactive_move_results
-  next: return_ok
-
-return_ok:
-  reloadDsl: true
-  status: 200
-  return: "Status Changed Successfully"
-  next: end
-
-return_bad_request:
-  status: 400
-  return: ${err_result.response.body.message}
-  next: end
-
-return_incorrect_request:
-  status: 400
-  return: "Required parameter(s) missing"
-  next: end
-
-return_same_state_update:
-  status: 200
-  return: "Service is already in this state"
-  next: end
-
-return_service_file_missing:
-  status: 500
-  return: "Service file to update is missing"
-  next: end
-
-return_intent_does_not_exist:
-  status: 400
-  return: "does not exists"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/services/update-endpoint.yml b/DSL/Ruuter.public/services/POST/services/update-endpoint.yml
deleted file mode 100644
index 9228079..0000000
--- a/DSL/Ruuter.public/services/POST/services/update-endpoint.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Updates an existing endpoint"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: name
-        type: string
-        description: "Endpoint name"
-      - field: type
-        type: string
-        description: "Endpoint type"
-      - field: isCommon
-        type: boolean
-        description: "Endpoint common status"
-      - field: serviceId
-        type: string
-        description: "Service UUID"
-      - field: definitions
-        type: object
-        description: "Endpoint definitions"
-    params:
-      - field: id
-        type: string
-        description: "Endpoint UUID"
-
-extract_request_data:
-  assign:
-    id: ${incoming.params.id}
-
-check_for_type:
-  switch:
-    - condition: ${id == null}
-      next: return_no_type_error
-
-update_endpoint:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/endpoints/update_endpoint"
-    body:
-      endpointId: ${id}
-      name: ${incoming.body.name}
-      type: ${incoming.body.type}
-      isCommon: ${incoming.body.isCommon}
-      serviceId: ${incoming.body.serviceId ?? ''}
-      definitions: ${incoming.body.definitions}
-  result: res
-
-return_ok:
-  status: 200
-  return: "Endpoint updated"
-  next: end
-
-return_no_type_error:
-  status: 400
-  return: "Please provide an endpoint ID"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/steps/preferences.yml b/DSL/Ruuter.public/services/POST/steps/preferences.yml
deleted file mode 100644
index 61f7272..0000000
--- a/DSL/Ruuter.public/services/POST/steps/preferences.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'PREFERENCES'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: steps
-        type: string
-        description: "Body field 'steps'"
-      - field: endpoints
-        type: string
-        description: "Body field 'endpoints'"
-
-extractRequestData:
-  assign:
-    steps: ${incoming.body.steps.join(",")}
-    endpoints: ${incoming.body.endpoints.join(",")}
-
-get_user_info:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
-    contentType: plaintext
-    headers:
-      cookie: ${incoming.headers.cookie}
-    plaintext: "customJwtCookie"
-  result: res
-
-check_user_info_response:
-  switch:
-    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
-      next: assignIdCode
-  next: return_unauthorized
-
-assignIdCode:
-  assign:
-    idCode: ${res.response.body.idCode}
-
-update_user_step_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/update-user-step-preferences"
-    body:
-      steps: "{${steps}}"
-      endpoints: "{${endpoints}}"
-      user_id_code: ${idCode}
-  result: update_preferences_res
-
-get_user_step_preferences:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL]/get-user-step-preferences"
-    body:
-      user_id_code: ${idCode}
-  result: preferences
-
-return_preferences:
-  return: ${preferences.response.body[0]}
-  next: end
-
-return_unauthorized:
-  status: 401
-  return: "unauthorized"
-  next: end
diff --git a/DSL/Ruuter.public/services/POST/user-info.yml b/DSL/Ruuter.public/services/POST/user-info.yml
deleted file mode 100644
index e6309aa..0000000
--- a/DSL/Ruuter.public/services/POST/user-info.yml
+++ /dev/null
@@ -1,16 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'USER-INFO'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-
-get_tara_info:
-  template: "[#SERVICE_PROJECT_LAYER]/tara"
-  requestType: templates
-  result: TARA
-
-return_authorized:
-  return: ${TARA.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml b/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
deleted file mode 100644
index fbe0f03..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/RBAC.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'RBAC'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: allowedRoles
-        type: object
-        description: "Body field 'allowedRoles'"
-      - field: userId
-        type: string
-        description: "Body field 'userId'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null}
-      next: missing_parameter
-
-extract_request_data:
-  assign:
-    userId: ${incoming.body.userId}
-    allowedRoles: ${incoming.body.allowedRoles.sort()}
-
-check_for_required_parameters:
-  switch:
-    - condition: ${userId === null || allowedRoles === null}
-      next: missing_parameter
-  next: fetch_user_roles_from_db
-
-fetch_user_roles_from_db:
-  call: http.post
-  args:
-    url: "[#SERVICE_RESQL_USERS]:[#SERVICE_RESQL_USERS_PORT]/is-user-roles-allowed"
-    body:
-      userId: ${userId}
-      allowedRoles: ${allowedRoles}
-  result: result
-
-return_value:
-  status: 200
-  return: "${result.response.body[0]}"
-  next: end
-
-missing_parameter:
-  status: 400
-  return: "userId, allowedRoles - missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml b/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
deleted file mode 100644
index 2564113..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/check-user-authority.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CHECK-USER-AUTHORITY'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-get_cookie_info:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
-    contentType: plaintext
-    headers:
-      cookie: ${incoming.headers.cookie}
-    plaintext: "customJwtCookie"
-  result: res
-  next: check_cookie_info_response
-
-check_cookie_info_response:
-  switch:
-    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
-      next: check_user_authority
-  next: return_bad_request
-
-check_user_authority:
-  switch:
-    - condition: ${res.response.body.authorities.includes("ROLE_ADMINISTRATOR") || res.response.body.authorities.includes("ROLE_SERVICE_MANAGER")}
-      next: return_authorized
-  next: return_unauthorized
-
-return_authorized:
-  return: ${res.response.body}
-  next: end
-
-return_unauthorized:
-  status: 200
-  return: false
-  next: end
-
-return_bad_request:
-  status: 400
-  return: false
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/client-input.yml b/DSL/Ruuter.public/services/TEMPLATES/client-input.yml
deleted file mode 100644
index 1d635ea..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/client-input.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'CLIENT-INPUT'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-
-# TODO: replace with correct request to get user input
-request_client_input:
-  call: reflect.mock
-  args:
-    response:
-      input: "Yes"
-  result: clientInput
-
-return_value:
-  return: ${clientInput.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml b/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
deleted file mode 100644
index 175f028..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/direct-to-cs.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'DIRECT-TO-CS'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: message
-        type: string
-        description: "Body field 'message'"
-
-# Direct to customer support
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
-      next: missing_body_parameter
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    message: ${incoming.body.message}
-
-# TODO: do the actual request
-send_message_to_client:
-  call: reflect.mock
-  args:
-    response:
-      status: 'OK'
-      message: "Teid suunatakse klienditeenindusse"
-  result: result
-
-return_value:
-  return: ${result.response.body}
-  next: end
-
-missing_body_parameter:
-  status: 400
-  return: 'message - missing'
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml b/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
deleted file mode 100644
index 43dbf6b..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/end-conversation.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'END-CONVERSATION'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: message
-        type: string
-        description: "Body field 'message'"
-
-# End conversation
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
-      next: missing_body_parameter
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    message: ${incoming.body.message}
-
-# TODO: do the actual request
-send_message_to_client:
-  call: reflect.mock
-  args:
-    response:
-      status: 'OK'
-      message: "Teenus on lõpetatud"
-  result: result
-
-return_value:
-  return: ${result.response.body}
-  next: end
-
-missing_body_parameter:
-  status: 400
-  return: 'message - missing'
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml b/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
deleted file mode 100644
index 4eb9f42..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/file-generate.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'FILE-GENERATE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: fileContent
-        type: string
-        description: "Body field 'fileContent'"
-      - field: fileName
-        type: string
-        description: "Body field 'fileName'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.fileName == null || incoming.body.fileContent == null}
-      next: missing_body_parameters
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    fileName: ${incoming.body.fileName}
-    fileContent: ${incoming.body.fileContent}
-
-generate_pdf_file:
-  call: http.post
-  args:
-    url: "[#SERVICE_DMAPPER]/js/generate/pdf"
-    body:
-      filename: ${fileName}
-      template: ${fileContent}
-  result: result
-
-return:
-  return: ${result.response.body}
-  next: end
-
-missing_body_parameters:
-  status: 400
-  return: "fileName, fileContent - missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml b/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
deleted file mode 100644
index 4d2571c..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/file-signing.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'FILE-SIGNING'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: country
-        type: string
-        description: "Body field 'country'"
-      - field: personIdentifier
-        type: string
-        description: "Body field 'personIdentifier'"
-      - field: phoneNr
-        type: string
-        description: "Body field 'phoneNr'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-
-siga_template_request:
-  template: siga
-  requestType: templates
-  body:
-    type: ${incoming.body.type}
-    personIdentifier: ${incoming.body.personIdentifier}
-    country: ${incoming.body.country}
-    phoneNr: ${incoming.body.phoneNr}
-  result: result
-
-return_result:
-  return: ${result.response.body}
diff --git a/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml b/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
deleted file mode 100644
index 9435e42..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/open-webpage.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'OPEN-WEBPAGE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: link
-        type: string
-        description: "Body field 'link'"
-      - field: linkText
-        type: string
-        description: "Body field 'linkText'"
-
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.link == null || incoming.body.link == "" || incoming.body.linkText == null || incoming.body.linkText == ""}
-      next: missing_body_parameter
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    link: ${incoming.body.link}
-    linkText: ${incoming.body.linkText}
-
-send_link_to_client:
-  call: reflect.mock
-  args:
-    response:
-      status: "OK"
-      link: <a href="https://example.com" target="_blank">Link Text</a>
-  result: result
-
-return_value:
-  return: ${result.response.body}
-  next: end
-
-missing_body_parameter:
-  status: 400
-  return: "link, linkText - both or one of these fields are missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml b/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
deleted file mode 100644
index 0db2430..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/send-message-to-client.yml
+++ /dev/null
@@ -1,42 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SEND-MESSAGE-TO-CLIENT'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: message
-        type: string
-        description: "Body field 'message'"
-
-# Message to client element
-check_for_body:
-  switch:
-    - condition: ${incoming.body == null || incoming.body.message == null || incoming.body.message == ""}
-      next: missing_body_parameter
-  next: extract_request_data
-
-extract_request_data:
-  assign:
-    message: ${incoming.body.message}
-
-# TODO: do the actual request
-send_message_to_client:
-  call: reflect.mock
-  args:
-    response:
-      status: 'OK'
-      message: "Hello, Muki"
-  result: result
-
-return_value:
-  return: ${result.response.body}
-  next: end
-
-missing_body_parameter:
-  status: 400
-  return: 'message - missing'
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/siga.yml b/DSL/Ruuter.public/services/TEMPLATES/siga.yml
deleted file mode 100644
index adb8984..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/siga.yml
+++ /dev/null
@@ -1,132 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'SIGA'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: containerType
-        type: string
-        description: "Body field 'containerType'"
-      - field: country
-        type: string
-        description: "Body field 'country'"
-      - field: file
-        type: string
-        description: "Body field 'file'"
-      - field: phoneNumber
-        type: string
-        description: "Body field 'phoneNumber'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-
-extract_request_data:
-  assign:
-    file: ${incoming.body.file}
-    signType: ${incoming.body.type}
-    country: ${incoming.body.country}
-    phoneNumber: ${incoming.body.phoneNumber}
-    containerType: ${incoming.body.containerType}
-  next: get_tara_info
-
-get_tara_info:
-  template: tara
-  requestType: templates
-  result: tara_res
-  next: extract_tara_data
-
-extract_tara_data:
-  assign:
-    identifier: ${tara_res.response.body.idCode}
-  next: check_for_container_type
-
-check_for_container_type:
-  switch:
-    - condition: ${containerType === "ASIC".toLowerCase()}
-      next: create_asic_container
-    - condition: ${containerType === "HASHCODE".toLowerCase()}
-      next: create_hashcode_container
-  next: missing_container_type
-
-create_asic_container:
-  call: http.post
-  args:
-    url: "[#SERVICE_SIGA]/create-container"
-    contentType: formdata
-    body:
-      file:file[0]:uploadedFile.pdf: ${file}
-  return: container_res
-  next: check_if_sign_type_missing
-
-create_hashcode_container:
-  call: http.post
-  args:
-    url: "[#SERVICE_SIGA]/create-hashcode-container"
-    contentType: formdata
-    body:
-      file:file[0]:uploadedFile.pdf: ${file}
-  return: container_res
-  next: check_if_sign_type_missing
-
-check_if_sign_type_missing:
-  switch:
-    - condition: ${signType === null}
-      next: missing_sign_type
-  next: check_for_sign_type
-
-check_for_sign_type:
-  switch:
-    - condition: ${signType === "smart_id"}
-      next: sign_via_smart_id
-    - condition: ${signType === "mobile_sign"}
-      next: sign_via_mobile
-  next: missing_sign_type
-
-sign_via_smart_id:
-  call: http.post
-  args:
-    url: "[#SERVICE_SIGA]/smartid-signing"
-    body:
-      containerId: ${container_res.response.body.id}
-      containerType: ${containerType.toUpperCase()}
-      personIdentifier: ${identifier}
-      country: ${country}
-  return: res
-  next: end
-
-sign_via_mobile:
-  call: http.post
-  args:
-    url: "[#SERVICE_SIGA]/mobile-signing"
-    body:
-      containerId: ${container_res.response.body.id}
-      containerType: ${containerType.toUpperCase()}
-      phoneNr: ${phoneNumber}
-      personIdentifier: ${identifier}
-      country: ${country}
-  return: res
-  next: end
-
-missing_smart_id_params:
-  status: 400
-  return: "Id, country - missing"
-  next: end
-
-missing_mobile_sign_params:
-  status: 400
-  return: "Phone number, country - missing"
-  next: end
-
-missing_sign_type:
-  status: 400
-  return: "Sign type is missing"
-  next: end
-
-missing_container_type:
-  status: 400
-  return: "Container type is missing"
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/tara.yml b/DSL/Ruuter.public/services/TEMPLATES/tara.yml
deleted file mode 100644
index 28ac86d..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/tara.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'TARA'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    headers:
-      - field: cookie
-        type: string
-        description: "Cookie field"
-
-
-check_for_body:
-  switch:
-    - condition: ${incoming.headers == null || incoming.headers.cookie == null}
-      next: missing_cookie
-  next: get_cookie_info
-
-get_cookie_info:
-  call: http.post
-  args:
-    url: "[#SERVICE_TIM]/jwt/custom-jwt-userinfo"
-    contentType: plaintext
-    headers:
-      cookie: ${incoming.headers.cookie}
-    plaintext: "customJwtCookie"
-  result: res
-  next: check_cookie_info_response
-
-check_cookie_info_response:
-  switch:
-    - condition: ${200 <= res.response.statusCodeValue && res.response.statusCodeValue < 300}
-      next: return_auth_result
-  next: return_bad_request
-
-return_auth_result:
-  return: ${res.response.body}
-  next: end
-
-return_bad_request:
-  status: 400
-  return: false
-  next: end
-
-missing_cookie:
-  status: 401
-  return: "no authentication cookie"
-  next: end
diff --git a/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml b/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml
deleted file mode 100644
index 63f21c8..0000000
--- a/DSL/Ruuter.public/services/TEMPLATES/validation-template.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-declaration:
-  call: declare
-  version: 0.1
-  description: "Decription placeholder for 'VALIDATION-TEMPLATE'"
-  method: post
-  accepts: json
-  returns: json
-  namespace: service
-  allowlist:
-    body:
-      - field: response
-        type: string
-        description: "Body field 'response'"
-      - field: type
-        type: string
-        description: "Body field 'type'"
-
-assign_step:
-  assign:
-    res: ${incoming.body.response}
-    type: ${incoming.body.type}
-  next: check_for_type
-
-check_for_type:
-  switch:
-    - condition: ${type.toLowerCase() === 'get'}
-      next: validate_get_request
-  next: validate_post_request
-
-validate_get_request:
-  call: http.get
-  args:
-    url: ${res}
-  result: results
-  next: validate_status_code
-
-validate_post_request:
-  call: http.post
-  args:
-    url: ${res}
-  result: results
-  next: validate_status_code
-
-validate_status_code:
-  switch:
-    - condition: ${200 <= results.response.statusCodeValue && results.response.statusCodeValue < 300}
-      next: return_true
-  next: return_false
-
-return_true:
-  return: true
-  next: end
-
-return_false:
-  return: false
-  next: end
diff --git a/src/tool_classifier/constants.py b/src/tool_classifier/constants.py
index 545300c..ffda950 100644
--- a/src/tool_classifier/constants.py
+++ b/src/tool_classifier/constants.py
@@ -37,7 +37,7 @@
 RUUTER_BASE_URL = "http://ruuter-private:8086"
 """Base URL for Ruuter private service endpoints."""
 
-RUUTER_SERVICE_BASE_URL = "http://ruuter-public:8086/services"
+RUUTER_SERVICE_BASE_URL = "http://ruuter:8086/services"
 """Base URL for Ruuter public service endpoints (active services)."""
 
 RAG_SEARCH_RUUTER_PUBLIC = "http://ruuter-public:8086/rag-search"

From d3e149473c7ca58f343e65e022b550cf12d92d4a Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Thu, 12 Mar 2026 14:39:45 +0530
Subject: [PATCH 24/27] fixed issues

---
 src/tool_classifier/context_analyzer.py       | 168 ++-
 .../workflows/context_workflow.py             |  47 +-
 tests/conftest.py                             |   9 +
 tests/test_context_analyzer.py                | 979 ++++++++++++++++++
 tests/test_context_workflow.py                | 698 +++++++++++++
 tests/test_context_workflow_integration.py    | 851 +++++++++++++++
 6 files changed, 2728 insertions(+), 24 deletions(-)
 create mode 100644 tests/test_context_analyzer.py
 create mode 100644 tests/test_context_workflow.py
 create mode 100644 tests/test_context_workflow_integration.py

diff --git a/src/tool_classifier/context_analyzer.py b/src/tool_classifier/context_analyzer.py
index 4572aef..3584683 100644
--- a/src/tool_classifier/context_analyzer.py
+++ b/src/tool_classifier/context_analyzer.py
@@ -207,7 +207,6 @@ def __init__(self, llm_manager: Any) -> None:  # noqa: ANN401
         # Phase 1 & 2 modules for two-phase detection+generation flow
         self._detection_module: Optional[dspy.Module] = None
         self._response_generation_module: Optional[dspy.Module] = None
-        self._stream_predictor: Optional[Any] = None
         logger.info("Context analyzer initialized")
 
     def _format_conversation_history(
@@ -357,6 +356,111 @@ async def detect_context(
         )
         return result, cost_dict
 
+    async def detect_context_with_summary_fallback(
+        self,
+        query: str,
+        conversation_history: List[Dict[str, Any]],
+    ) -> tuple[ContextDetectionResult, Dict[str, Any]]:
+        """
+        Phase 1 with summary fallback: detect if query can be answered from history.
+
+        Implements a 3-step flow:
+        1. Check the last 10 turns via detect_context().
+        2. If cannot answer AND total history > 10 turns:
+           - Generate a concise summary of the older turns (everything before the last 10).
+           - Check whether the query can be answered from that summary.
+        3. If still cannot answer, return can_answer=False (workflow falls back to RAG).
+
+        When the summary path succeeds, the returned ContextDetectionResult has:
+        - can_answer_from_context=True
+        - answered_from_summary=True
+        - context_snippet set to the answer extracted from the summary, so that
+          Phase 2 (stream_context_response / generate_context_response) can use it
+          directly as the context for response generation.
+
+        Args:
+            query: User query to classify
+            conversation_history: Full conversation history
+
+        Returns:
+            Tuple of (ContextDetectionResult, cost_dict)
+        """
+        total_turns = len(conversation_history)
+
+        # Step 1: check the most recent 10 turns
+        result, cost_dict = await self.detect_context(
+            query=query, conversation_history=conversation_history
+        )
+
+        # If already answered or it's a greeting, return immediately
+        if result.is_greeting or result.can_answer_from_context:
+            return result, cost_dict
+
+        # Step 2 & 3: if history exceeds 10 turns, try summary-based detection
+        if total_turns > 10:
+            logger.info(
+                f"History has {total_turns} turns (> 10) | "
+                f"Cannot answer from recent 10 | Attempting summary-based detection"
+            )
+            older_history = conversation_history[:-10]
+            logger.info(f"Summarizing {len(older_history)} older turns")
+
+            try:
+                summary, summary_cost = await self._generate_conversation_summary(
+                    older_history
+                )
+                cost_dict = self._merge_cost_dicts(cost_dict, summary_cost)
+
+                if summary:
+                    summary_result, analysis_cost = await self._analyze_from_summary(
+                        query=query, summary=summary
+                    )
+                    cost_dict = self._merge_cost_dicts(cost_dict, analysis_cost)
+
+                    if summary_result.can_answer_from_context and summary_result.answer:
+                        logger.info(
+                            f"DETECTION: Can answer from summary | "
+                            f"Reasoning: {summary_result.reasoning}"
+                        )
+                        # Surface the summary-derived answer as context_snippet so
+                        # Phase 2 can generate a polished response from it.
+                        return ContextDetectionResult(
+                            is_greeting=False,
+                            can_answer_from_context=True,
+                            reasoning=summary_result.reasoning,
+                            context_snippet=summary_result.answer,
+                            answered_from_summary=True,
+                        ), cost_dict
+
+                    logger.info(
+                        "Cannot answer from summary either | Falling back to RAG"
+                    )
+                else:
+                    logger.warning(
+                        "Summary generation returned empty | Falling back to RAG"
+                    )
+
+            except Exception as e:
+                logger.error(f"Summary-based detection failed: {e}", exc_info=True)
+        else:
+            logger.info(
+                f"History has {total_turns} turns (<= 10) | "
+                f"No summary needed | Falling back to RAG"
+            )
+
+        return result, cost_dict
+
+    @staticmethod
+    def _yield_in_chunks(text: str, chunk_size: int = 5) -> list[str]:
+        """Split text into word-group chunks for simulated streaming."""
+        words = text.split()
+        chunks = []
+        for i in range(0, len(words), chunk_size):
+            group = words[i : i + chunk_size]
+            trailing = " " if i + chunk_size < len(words) else ""
+            chunks.append(" ".join(group) + trailing)
+        return chunks
+
     async def stream_context_response(
         self,
         query: str,
@@ -365,30 +469,39 @@ async def stream_context_response(
         """
         Phase 2 (streaming): Stream a generated answer using DSPy native streaming.
 
-        Uses ContextResponseGenerationSignature with DSPy's streamify() so tokens
-        are yielded in real time — same mechanism as ResponseGeneratorAgent.stream_response().
+        Creates a fresh streamify predictor per call (avoids stale StreamListener
+        issues that occur when the cached predictor is reused across calls).
+
+        Fallback chain:
+        1. DSPy streamify → yield StreamResponse tokens as they arrive.
+        2. If no stream tokens received but final Prediction has an answer,
+           yield it in word-group chunks.
+        3. If that is also empty, call generate_context_response() directly
+           and yield the result in word-group chunks.
 
         Args:
             query: The user query to answer
             context_snippet: Relevant context extracted during Phase 1 detection
 
         Yields:
-            Token strings as they arrive from the LLM
+            Token strings as they arrive from the LLM (or simulated chunks)
         """
         logger.info(f"CONTEXT GENERATOR: Phase 2 streaming | Query: '{query[:100]}'")
 
         self.llm_manager.ensure_global_config()
         output_stream = None
         stream_started = False
+        prediction_answer: Optional[str] = None
         try:
             with self.llm_manager.use_task_local():
-                if self._stream_predictor is None:
-                    answer_listener = StreamListener(signature_field_name="answer")
-                    self._stream_predictor = dspy.streamify(
-                        dspy.Predict(ContextResponseGenerationSignature),
-                        stream_listeners=[answer_listener],
-                    )
-                output_stream = self._stream_predictor(
+                # Always create a fresh StreamListener + streamified predictor so that
+                # the listener's internal state is clean for this call.
+                answer_listener = StreamListener(signature_field_name="answer")
+                stream_predictor: Any = dspy.streamify(
+                    dspy.Predict(ContextResponseGenerationSignature),
+                    stream_listeners=[answer_listener],
+                )
+                output_stream = stream_predictor(
                     context_snippet=context_snippet,
                     user_query=query,
                 )
@@ -402,11 +515,11 @@ async def stream_context_response(
                         logger.info(
                             "Context response streaming complete (final Prediction received)"
                         )
+                        if not stream_started:
+                            # Tokens didn't stream — extract answer from the Prediction
+                            # directly as first fallback before leaving the LM context.
+                            prediction_answer = getattr(chunk, "answer", "") or ""
 
-            if not stream_started:
-                logger.warning(
-                    "Context streaming finished but no 'answer' tokens received."
-                )
         except GeneratorExit:
             raise
         except Exception as e:
@@ -421,6 +534,31 @@ async def stream_context_response(
                         f"Error during context stream cleanup: {cleanup_error}"
                     )
 
+        if stream_started:
+            return
+
+        # Fallback 1: answer was in the final Prediction but didn't stream as tokens
+        if prediction_answer:
+            logger.warning(
+                "Stream tokens not received — yielding answer from final Prediction in chunks."
+            )
+            for text_chunk in self._yield_in_chunks(prediction_answer):
+                yield text_chunk
+            return
+
+        # Fallback 2: Prediction had no answer either — call generate_context_response
+        logger.warning(
+            "No answer from streamify — falling back to generate_context_response."
+        )
+        fallback_answer, _ = await self.generate_context_response(
+            query=query, context_snippet=context_snippet
+        )
+        if fallback_answer:
+            for text_chunk in self._yield_in_chunks(fallback_answer):
+                yield text_chunk
+        else:
+            logger.error("All Phase 2 streaming fallbacks exhausted — empty response.")
+
     async def generate_context_response(
         self,
         query: str,
diff --git a/src/tool_classifier/workflows/context_workflow.py b/src/tool_classifier/workflows/context_workflow.py
index 8d69675..0aa7fb2 100644
--- a/src/tool_classifier/workflows/context_workflow.py
+++ b/src/tool_classifier/workflows/context_workflow.py
@@ -1,6 +1,6 @@
 """Context workflow executor - Layer 2: Conversation history and greetings."""
 
-from typing import Any, AsyncIterator, Dict, Optional
+from typing import Any, AsyncIterator, Dict, Optional, cast
 import time
 import dspy
 from loguru import logger
@@ -77,10 +77,19 @@ async def _detect(
         time_metric: Dict[str, float],
         costs_metric: Dict[str, Dict[str, Any]],
     ) -> Optional[ContextDetectionResult]:
-        """Phase 1: run context detection. Returns ContextDetectionResult or None on error."""
+        """Phase 1: run context detection with summary fallback.
+
+        Checks the last 10 conversation turns first. If the query cannot be
+        answered from those and the history exceeds 10 turns, falls back to a
+        summary-based check over the older turns. Returns None on error so the
+        caller falls through to RAG.
+        """
         try:
             start = time.time()
-            result, cost = await self.context_analyzer.detect_context(
+            (
+                result,
+                cost,
+            ) = await self.context_analyzer.detect_context_with_summary_fallback(
                 query=message, conversation_history=history
             )
             time_metric["context.detection"] = time.time() - start
@@ -267,12 +276,29 @@ async def execute_async(
         language = detect_language(request.message)
         history = self._build_history(request)
 
-        detection_result = await self._detect(
-            request.message, history, time_metric, costs_metric
-        )
-        if detection_result is None:
-            self._log_costs(costs_metric)
-            return None
+        # Check if analysis is pre-computed (e.g. from classifier classify step)
+        pre_computed = context.get("analysis_result")
+        if (
+            pre_computed is not None
+            and hasattr(pre_computed, "is_greeting")
+            and hasattr(pre_computed, "can_answer_from_context")
+        ):
+            detection_result: ContextDetectionResult = cast(
+                ContextDetectionResult, pre_computed
+            )
+            costs_metric.setdefault(
+                "context_detection",
+                {"total_cost": 0.0, "total_tokens": 0, "num_calls": 0},
+            )
+        else:
+            _detected = await self._detect(
+                request.message, history, time_metric, costs_metric
+            )
+            if _detected is None:
+                self._log_costs(costs_metric)
+                context["costs_dict"] = costs_metric
+                return None
+            detection_result = _detected
 
         logger.info(
             f"[{request.chatId}] Detection: greeting={detection_result.is_greeting} "
@@ -286,6 +312,7 @@ async def execute_async(
                 greeting_type=detection_result.greeting_type, language=language
             )
             self._log_costs(costs_metric)
+            context["costs_dict"] = costs_metric
             return OrchestrationResponse(
                 chatId=request.chatId,
                 llmServiceActive=True,
@@ -298,6 +325,7 @@ async def execute_async(
             detection_result.can_answer_from_context
             and detection_result.context_snippet
         ):
+            context["costs_dict"] = costs_metric
             return await self._generate_response_async(
                 request, detection_result.context_snippet, time_metric, costs_metric
             )
@@ -306,6 +334,7 @@ async def execute_async(
             f"[{request.chatId}] Cannot answer from context — falling back to RAG"
         )
         self._log_costs(costs_metric)
+        context["costs_dict"] = costs_metric
         return None
 
     async def execute_streaming(
diff --git a/tests/conftest.py b/tests/conftest.py
index d1633b7..e26acfc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,3 +6,12 @@
 # Add the project root to Python path so tests can import from src
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
+
+# Add src directory to Python path for direct module imports
+src_dir = project_root / "src"
+sys.path.insert(0, str(src_dir))
+
+# Add models directory (sibling to src) for backward compatibility
+models_dir = project_root / "models"
+if models_dir.exists():
+    sys.path.insert(0, str(models_dir.parent))
diff --git a/tests/test_context_analyzer.py b/tests/test_context_analyzer.py
new file mode 100644
index 0000000..094b8a4
--- /dev/null
+++ b/tests/test_context_analyzer.py
@@ -0,0 +1,979 @@
+"""Unit tests for context analyzer - greeting detection and context analysis."""
+
+import pytest
+from collections.abc import Generator
+from unittest.mock import MagicMock, patch
+import json
+import dspy
+
+from src.tool_classifier.context_analyzer import (
+    ContextAnalyzer,
+)
+from src.tool_classifier.greeting_constants import get_greeting_response
+
+
+@pytest.fixture(autouse=True)
+def mock_dspy_lm() -> Generator[MagicMock, None, None]:
+    """Mock DSPy LM to prevent 'No LM is loaded' errors."""
+    mock_lm = MagicMock()
+    mock_lm.history = []
+    with patch("dspy.settings") as mock_settings:
+        mock_settings.lm = mock_lm
+        # Configure DSPy with mock LM
+        dspy.configure(lm=mock_lm)
+        yield mock_lm
+
+
+class TestContextAnalyzerInit:
+    """Test ContextAnalyzer initialization."""
+
+    def test_init_creates_analyzer(self) -> None:
+        """ContextAnalyzer should initialize with LLM manager."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        assert analyzer.llm_manager is llm_manager
+        assert analyzer._module is None
+        assert analyzer._summary_module is None
+        assert analyzer._summary_analysis_module is None
+
+
+class TestConversationHistoryFormatting:
+    """Test conversation history formatting."""
+
+    def test_format_empty_history(self) -> None:
+        """Empty history should return empty JSON array."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        result = analyzer._format_conversation_history([])
+
+        assert result == "[]"
+
+    def test_format_single_turn(self) -> None:
+        """Single conversation turn should be formatted correctly."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        history = [
+            {
+                "authorRole": "user",
+                "message": "Hello",
+                "timestamp": "2024-01-01T12:00:00",
+            }
+        ]
+
+        result = analyzer._format_conversation_history(history)
+        parsed = json.loads(result)
+
+        assert len(parsed) == 1
+        assert parsed[0]["role"] == "user"
+        assert parsed[0]["message"] == "Hello"
+
+    def test_format_multiple_turns(self) -> None:
+        """Multiple conversation turns should be formatted correctly."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        history = [
+            {
+                "authorRole": "user",
+                "message": "What is tax?",
+                "timestamp": "2024-01-01T12:00:00",
+            },
+            {
+                "authorRole": "bot",
+                "message": "Tax is a mandatory financial charge.",
+                "timestamp": "2024-01-01T12:00:01",
+            },
+            {
+                "authorRole": "user",
+                "message": "Thank you",
+                "timestamp": "2024-01-01T12:00:02",
+            },
+        ]
+
+        result = analyzer._format_conversation_history(history)
+        parsed = json.loads(result)
+
+        assert len(parsed) == 3
+        assert parsed[0]["role"] == "user"
+        assert parsed[1]["role"] == "bot"
+        assert parsed[2]["role"] == "user"
+
+    def test_format_truncates_to_max_turns(self) -> None:
+        """History should be truncated to last 10 turns."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Create 15 turns
+        history = [
+            {
+                "authorRole": "user" if i % 2 == 0 else "bot",
+                "message": f"Message {i}",
+                "timestamp": f"2024-01-01T12:00:{i:02d}",
+            }
+            for i in range(15)
+        ]
+
+        result = analyzer._format_conversation_history(history, max_turns=10)
+        parsed = json.loads(result)
+
+        assert len(parsed) == 10
+        # Should have last 10 turns (indices 5-14)
+        assert parsed[0]["message"] == "Message 5"
+        assert parsed[-1]["message"] == "Message 14"
+
+
+class TestGreetingDetection:
+    """Test greeting detection functionality."""
+
+    @pytest.mark.asyncio
+    async def test_detect_estonian_greeting(self) -> None:
+        """Should detect Estonian greeting 'Tere' and generate response."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Mock DSPy module response
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": True,
+                "can_answer_from_context": False,
+                "answer": "Tere! Kuidas ma saan sind aidata?",
+                "reasoning": "User said hello in Estonian",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, cost_dict = await analyzer.analyze_context(
+                    query="Tere!",
+                    conversation_history=[],
+                    language="et",
+                )
+
+        assert result.is_greeting is True
+        assert result.can_answer_from_context is False
+        assert "Tere" in result.answer
+        assert cost_dict["total_cost"] == 0.001
+
+    @pytest.mark.asyncio
+    async def test_detect_english_greeting(self) -> None:
+        """Should detect English greeting 'Hello' and generate response."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Mock DSPy module response
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": True,
+                "can_answer_from_context": False,
+                "answer": "Hello! How can I help you?",
+                "reasoning": "User said hello in English",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, cost_dict = await analyzer.analyze_context(
+                    query="Hello!",
+                    conversation_history=[],
+                    language="en",
+                )
+
+        assert result.is_greeting is True
+        assert "Hello" in result.answer or "hello" in result.answer.lower()
+
+    @pytest.mark.asyncio
+    async def test_detect_goodbye(self) -> None:
+        """Should detect goodbye greeting."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": True,
+                "can_answer_from_context": False,
+                "answer": "Goodbye! Have a great day!",
+                "reasoning": "User said goodbye",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="Bye!",
+                    conversation_history=[],
+                    language="en",
+                )
+
+        assert result.is_greeting is True
+
+    @pytest.mark.asyncio
+    async def test_detect_thanks(self) -> None:
+        """Should detect thank you greeting."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": True,
+                "can_answer_from_context": False,
+                "answer": "You're welcome! Feel free to ask if you have more questions.",
+                "reasoning": "User said thank you",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="Thank you!",
+                    conversation_history=[],
+                    language="en",
+                )
+
+        assert result.is_greeting is True
+
+
+class TestContextBasedAnswering:
+    """Test context-based question answering."""
+
+    @pytest.mark.asyncio
+    async def test_answer_from_conversation_history(self) -> None:
+        """Should extract answer from conversation history when query references it."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        history = [
+            {
+                "authorRole": "user",
+                "message": "What is the tax rate?",
+                "timestamp": "2024-01-01T12:00:00",
+            },
+            {
+                "authorRole": "bot",
+                "message": "The tax rate is 20%.",
+                "timestamp": "2024-01-01T12:00:01",
+            },
+        ]
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": True,
+                "answer": "I mentioned that the tax rate is 20%.",
+                "reasoning": "User is asking about previously mentioned tax rate",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.002,
+                    "total_tokens": 100,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What was the rate you mentioned?",
+                    conversation_history=history,
+                    language="en",
+                )
+
+        assert result.is_greeting is False
+        assert result.can_answer_from_context is True
+        assert result.answer is not None
+        assert "20%" in result.answer
+
+    @pytest.mark.asyncio
+    async def test_cannot_answer_from_context(self) -> None:
+        """Should return cannot answer when query doesn't reference history."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        history = [
+            {
+                "authorRole": "user",
+                "message": "What is the weather?",
+                "timestamp": "2024-01-01T12:00:00",
+            },
+        ]
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Query is about taxes, not previous weather discussion",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.002,
+                    "total_tokens": 100,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What is the tax rate?",
+                    conversation_history=history,
+                    language="en",
+                )
+
+        assert result.is_greeting is False
+        assert result.can_answer_from_context is False
+        assert result.answer is None
+
+
+class TestErrorHandling:
+    """Test error handling in context analyzer."""
+
+    @pytest.mark.asyncio
+    async def test_handles_llm_json_parse_error(self) -> None:
+        """Should handle invalid JSON response from LLM gracefully."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Mock DSPy module to return invalid JSON
+        mock_response = MagicMock()
+        mock_response.analysis_result = "Invalid JSON response"
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="Hello",
+                    conversation_history=[],
+                    language="en",
+                )
+
+        # Should fallback to safe default
+        assert result.is_greeting is False
+        assert result.can_answer_from_context is False
+        assert result.answer is None
+        assert "Failed to parse" in result.reasoning
+
+    @pytest.mark.asyncio
+    async def test_handles_llm_exception(self) -> None:
+        """Should handle LLM call exceptions gracefully."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Mock DSPy module to raise exception
+        with patch.object(
+            dspy,
+            "ChainOfThought",
+            return_value=MagicMock(side_effect=Exception("LLM error")),
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.0,
+                    "total_tokens": 0,
+                    "num_calls": 0,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="Hello",
+                    conversation_history=[],
+                    language="en",
+                )
+
+        # Should fallback to safe default
+        assert result.is_greeting is False
+        assert result.can_answer_from_context is False
+        assert result.answer is None
+        assert "error" in result.reasoning.lower()
+
+
+class TestFallbackGreeting:
+    """Test fallback greeting responses."""
+
+    def test_fallback_estonian_greeting(self) -> None:
+        """Should return Estonian fallback greeting."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        response = analyzer.get_fallback_greeting_response("et")
+
+        assert "Tere" in response
+
+    def test_fallback_english_greeting(self) -> None:
+        """Should return English fallback greeting."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        response = analyzer.get_fallback_greeting_response("en")
+
+        assert "Hello" in response or "hello" in response
+
+    def test_fallback_unknown_language_defaults_to_estonian(self) -> None:
+        """Should default to Estonian for unknown language codes."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        response = analyzer.get_fallback_greeting_response("xx")
+
+        assert "Tere" in response or "tere" in response.lower()
+
+
+class TestGreetingConstants:
+    """Test greeting constants and helper functions."""
+
+    def test_get_estonian_hello(self) -> None:
+        """Should return Estonian hello greeting."""
+        response = get_greeting_response("hello", "et")
+        assert "Tere" in response
+
+    def test_get_english_goodbye(self) -> None:
+        """Should return English goodbye greeting."""
+        response = get_greeting_response("goodbye", "en")
+        assert "Goodbye" in response or "goodbye" in response
+
+    def test_get_estonian_thanks(self) -> None:
+        """Should return Estonian thanks greeting."""
+        response = get_greeting_response("thanks", "et")
+        assert "Palun" in response
+
+    def test_unknown_greeting_type_defaults_to_hello(self) -> None:
+        """Should default to hello for unknown greeting types."""
+        response = get_greeting_response("unknown", "en")
+        assert "Hello" in response or "hello" in response
+
+
+def _make_history(num_turns: int) -> list[dict[str, str]]:
+    """Helper to create a conversation history with the specified number of turns."""
+    return [
+        {
+            "authorRole": "user" if i % 2 == 0 else "bot",
+            "message": f"Message {i}",
+            "timestamp": f"2024-01-01T12:00:{i:02d}",
+        }
+        for i in range(num_turns)
+    ]
+
+
+class TestCostMerging:
+    """Test cost dictionary merging."""
+
+    def test_merge_cost_dicts(self) -> None:
+        """Should sum all numeric values from two cost dicts."""
+        cost1 = {
+            "total_cost": 0.001,
+            "total_tokens": 50,
+            "total_prompt_tokens": 30,
+            "total_completion_tokens": 20,
+            "num_calls": 1,
+        }
+        cost2 = {
+            "total_cost": 0.002,
+            "total_tokens": 100,
+            "total_prompt_tokens": 60,
+            "total_completion_tokens": 40,
+            "num_calls": 1,
+        }
+
+        merged = ContextAnalyzer._merge_cost_dicts(cost1, cost2)
+
+        assert merged["total_cost"] == pytest.approx(0.003)
+        assert merged["total_tokens"] == 150
+        assert merged["total_prompt_tokens"] == 90
+        assert merged["total_completion_tokens"] == 60
+        assert merged["num_calls"] == 2
+
+    def test_merge_cost_dicts_with_empty(self) -> None:
+        """Should handle merging with an empty cost dict."""
+        cost1 = {
+            "total_cost": 0.001,
+            "total_tokens": 50,
+            "total_prompt_tokens": 30,
+            "total_completion_tokens": 20,
+            "num_calls": 1,
+        }
+
+        merged = ContextAnalyzer._merge_cost_dicts(cost1, {})
+
+        assert merged["total_cost"] == 0.001
+        assert merged["total_tokens"] == 50
+        assert merged["num_calls"] == 1
+
+
+class TestConversationSummary:
+    """Test conversation summary generation."""
+
+    @pytest.mark.asyncio
+    async def test_generate_summary_from_older_turns(self) -> None:
+        """Should generate summary from older conversation turns."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        older_history = _make_history(6)
+
+        mock_response = MagicMock()
+        mock_response.summary = "User discussed messages 0-5 about various topics."
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                summary, cost_dict = await analyzer._generate_conversation_summary(
+                    older_history
+                )
+
+        assert summary == "User discussed messages 0-5 about various topics."
+        assert cost_dict["total_cost"] == 0.001
+
+    @pytest.mark.asyncio
+    async def test_generate_summary_handles_exception(self) -> None:
+        """Should return empty string when summary generation fails."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        with patch.object(
+            dspy,
+            "ChainOfThought",
+            return_value=MagicMock(side_effect=Exception("LLM error")),
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.0,
+                    "total_tokens": 0,
+                    "num_calls": 0,
+                }
+
+                summary, _ = await analyzer._generate_conversation_summary(
+                    _make_history(5)
+                )
+
+        assert summary == ""
+
+    @pytest.mark.asyncio
+    async def test_analyze_from_summary_can_answer(self) -> None:
+        """Should answer from summary when information is available."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "can_answer_from_context": True,
+                "answer": "The tax rate discussed earlier was 20%.",
+                "reasoning": "Summary contains tax rate information",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.002,
+                    "total_tokens": 100,
+                    "num_calls": 1,
+                }
+
+                result, cost_dict = await analyzer._analyze_from_summary(
+                    query="What was the tax rate?",
+                    summary="User asked about tax. Bot replied: tax rate is 20%.",
+                )
+
+        assert result.can_answer_from_context is True
+        assert result.answered_from_summary is True
+        assert result.answer is not None
+        assert "20%" in result.answer
+
+    @pytest.mark.asyncio
+    async def test_analyze_from_summary_cannot_answer(self) -> None:
+        """Should return cannot answer when summary doesn't contain relevant info."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Summary does not contain information about weather",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.002,
+                    "total_tokens": 100,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer._analyze_from_summary(
+                    query="What is the weather?",
+                    summary="User discussed tax rates and filing.",
+                )
+
+        assert result.can_answer_from_context is False
+        assert result.answered_from_summary is False
+        assert result.answer is None
+
+    @pytest.mark.asyncio
+    async def test_analyze_from_summary_handles_exception(self) -> None:
+        """Should return safe fallback when summary analysis fails."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        with patch.object(
+            dspy,
+            "ChainOfThought",
+            return_value=MagicMock(side_effect=Exception("LLM error")),
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.0,
+                    "total_tokens": 0,
+                    "num_calls": 0,
+                }
+
+                result, _ = await analyzer._analyze_from_summary(
+                    query="test", summary="test summary"
+                )
+
+        assert result.can_answer_from_context is False
+        assert result.answered_from_summary is False
+        assert result.answer is None
+
+
+class TestSummaryFlow:
+    """Test the full analyze_context flow with summary logic."""
+
+    @pytest.mark.asyncio
+    async def test_short_history_skips_summary(self) -> None:
+        """With <= 10 turns, should use recent history only, no summary."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Cannot answer from recent history, but only 8 turns - should NOT trigger summary
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Cannot answer from context",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What is digital signature?",
+                    conversation_history=_make_history(8),
+                    language="en",
+                )
+
+        # Should not answer (no summary triggered for <= 10 turns)
+        assert result.can_answer_from_context is False
+        assert result.answered_from_summary is False
+        assert result.answer is None
+
+    @pytest.mark.asyncio
+    async def test_long_history_answers_from_recent(self) -> None:
+        """With > 10 turns, if recent 10 can answer, should not trigger summary."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Can answer from recent history
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": True,
+                "answer": "The rate is 20%.",
+                "reasoning": "Found in recent history",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What was the rate?",
+                    conversation_history=_make_history(15),
+                    language="en",
+                )
+
+        assert result.can_answer_from_context is True
+        assert result.answered_from_summary is False
+        assert result.answer == "The rate is 20%."
+
+    @pytest.mark.asyncio
+    async def test_long_history_answers_from_summary(self) -> None:
+        """With > 10 turns, if recent can't answer but summary can, should return summary answer."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Step 1: Recent history cannot answer
+        recent_response = MagicMock()
+        recent_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Not in recent history",
+            }
+        )
+
+        # Step 2: Summary generation
+        summary_response = MagicMock()
+        summary_response.summary = (
+            "User asked about tax rates. Bot said the tax rate is 20%."
+        )
+
+        # Step 3: Summary analysis can answer
+        summary_analysis_response = MagicMock()
+        summary_analysis_response.analysis_result = json.dumps(
+            {
+                "can_answer_from_context": True,
+                "answer": "Based on our earlier discussion, the tax rate is 20%.",
+                "reasoning": "Found tax rate in conversation summary",
+            }
+        )
+
+        # Chain of Thought is called 3 times: recent analysis, summary gen, summary analysis
+        call_count = 0
+        mock_modules = [
+            MagicMock(return_value=recent_response),
+            MagicMock(return_value=summary_response),
+            MagicMock(return_value=summary_analysis_response),
+        ]
+
+        def chain_of_thought_factory(*args: object, **kwargs: object) -> MagicMock:
+            nonlocal call_count
+            module = mock_modules[call_count]
+            call_count += 1
+            return module
+
+        with patch.object(dspy, "ChainOfThought", side_effect=chain_of_thought_factory):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, cost_dict = await analyzer.analyze_context(
+                    query="What was the tax rate we discussed?",
+                    conversation_history=_make_history(15),
+                    language="en",
+                )
+
+        assert result.can_answer_from_context is True
+        assert result.answered_from_summary is True
+        assert result.answer is not None
+        assert "20%" in result.answer
+        # Costs should be merged from all 3 calls
+        assert cost_dict["num_calls"] == 3
+
+    @pytest.mark.asyncio
+    async def test_long_history_falls_to_rag(self) -> None:
+        """With > 10 turns, if neither recent nor summary can answer, should fall through."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        # Step 1: Recent history cannot answer
+        recent_response = MagicMock()
+        recent_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Not in recent history",
+            }
+        )
+
+        # Step 2: Summary generation
+        summary_response = MagicMock()
+        summary_response.summary = "User discussed weather and greetings."
+
+        # Step 3: Summary analysis cannot answer
+        summary_analysis_response = MagicMock()
+        summary_analysis_response.analysis_result = json.dumps(
+            {
+                "can_answer_from_context": False,
+                "answer": None,
+                "reasoning": "Summary does not contain tax information",
+            }
+        )
+
+        call_count = 0
+        mock_modules = [
+            MagicMock(return_value=recent_response),
+            MagicMock(return_value=summary_response),
+            MagicMock(return_value=summary_analysis_response),
+        ]
+
+        def chain_of_thought_factory(*args: object, **kwargs: object) -> MagicMock:
+            nonlocal call_count
+            module = mock_modules[call_count]
+            call_count += 1
+            return module
+
+        with patch.object(dspy, "ChainOfThought", side_effect=chain_of_thought_factory):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What is the tax rate?",
+                    conversation_history=_make_history(15),
+                    language="en",
+                )
+
+        # Should not be able to answer -> falls to RAG
+        assert result.can_answer_from_context is False
+        assert result.answered_from_summary is False
+        assert result.answer is None
+
+    @pytest.mark.asyncio
+    async def test_answered_from_summary_flag_is_false_for_recent(self) -> None:
+        """The answered_from_summary flag should be False for recent history answers."""
+        llm_manager = MagicMock()
+        analyzer = ContextAnalyzer(llm_manager)
+
+        mock_response = MagicMock()
+        mock_response.analysis_result = json.dumps(
+            {
+                "is_greeting": False,
+                "can_answer_from_context": True,
+                "answer": "The answer from recent history.",
+                "reasoning": "Found in recent conversation",
+            }
+        )
+
+        with patch.object(
+            dspy, "ChainOfThought", return_value=MagicMock(return_value=mock_response)
+        ):
+            with patch(
+                "src.tool_classifier.context_analyzer.get_lm_usage_since"
+            ) as mock_cost:
+                mock_cost.return_value = {
+                    "total_cost": 0.001,
+                    "total_tokens": 50,
+                    "num_calls": 1,
+                }
+
+                result, _ = await analyzer.analyze_context(
+                    query="What did you say?",
+                    conversation_history=_make_history(5),
+                    language="en",
+                )
+
+        assert result.answered_from_summary is False
diff --git a/tests/test_context_workflow.py b/tests/test_context_workflow.py
new file mode 100644
index 0000000..1362a72
--- /dev/null
+++ b/tests/test_context_workflow.py
@@ -0,0 +1,698 @@
+"""Unit tests for context workflow executor."""
+
+import pytest
+from collections.abc import AsyncGenerator, Generator
+from unittest.mock import AsyncMock, MagicMock, patch
+import dspy
+
+from src.tool_classifier.workflows.context_workflow import ContextWorkflowExecutor
+from src.tool_classifier.context_analyzer import ContextDetectionResult
+from models.request_models import (
+    OrchestrationRequest,
+    OrchestrationResponse,
+    ConversationItem,
+)
+
+
+@pytest.fixture
+def mock_dspy_lm() -> Generator[MagicMock, None, None]:
+    """Mock DSPy LM to prevent 'No LM is loaded' errors."""
+    mock_lm = MagicMock()
+    mock_lm.history = []
+    with patch("dspy.settings") as mock_settings:
+        mock_settings.lm = mock_lm
+        # Configure DSPy with mock LM
+        dspy.configure(lm=mock_lm)
+        yield mock_lm
+
+
+@pytest.fixture
+def mock_orchestration_service() -> MagicMock:
+    """Create mock orchestration service for streaming tests."""
+    import json as _json
+    import time as _time
+
+    service = MagicMock()
+
+    def _format_sse_impl(chat_id: str, content: str) -> str:
+        payload = {
+            "chatId": chat_id,
+            "payload": {"content": content},
+            "timestamp": int(_time.time() * 1000),
+        }
+        return f"data: {_json.dumps(payload)}\n\n"
+
+    service.format_sse = _format_sse_impl
+    service.log_costs = MagicMock()
+    return service
+
+
+@pytest.fixture
+def llm_manager() -> MagicMock:
+    """Create mock LLM manager."""
+    return MagicMock()
+
+
+@pytest.fixture
+def context_workflow(
+    llm_manager: MagicMock,
+    mock_orchestration_service: MagicMock,
+    mock_dspy_lm: MagicMock,
+) -> ContextWorkflowExecutor:
+    """Create ContextWorkflowExecutor instance."""
+    return ContextWorkflowExecutor(
+        llm_manager, orchestration_service=mock_orchestration_service
+    )
+
+
+@pytest.fixture
+def sample_request() -> OrchestrationRequest:
+    """Create sample orchestration request."""
+    return OrchestrationRequest(
+        chatId="test-chat-123",
+        message="Hello!",
+        authorId="test-user",
+        conversationHistory=[],
+        url="https://example.com",
+        environment="testing",
+        connection_id="test-connection",
+    )
+
+
+class TestContextWorkflowInit:
+    """Test context workflow initialization."""
+
+    def test_init_creates_workflow(self, llm_manager: MagicMock) -> None:
+        """ContextWorkflowExecutor should initialize with LLM manager."""
+        workflow = ContextWorkflowExecutor(llm_manager)
+
+        assert workflow.llm_manager is llm_manager
+        assert workflow.context_analyzer is not None
+
+
+class TestExecuteAsyncGreeting:
+    """Test execute_async with greeting queries."""
+
+    @pytest.mark.asyncio
+    async def test_execute_async_greeting_estonian(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should handle Estonian greeting and return response."""
+        sample_request.message = "Tere!"
+
+        # Mock context analyzer
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            greeting_type="hello",
+            can_answer_from_context=False,
+            reasoning="Greeting detected",
+            context_snippet=None,
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            context_dict = {}
+            response = await context_workflow.execute_async(
+                sample_request, context_dict
+            )
+
+        assert response is not None
+        assert isinstance(response, OrchestrationResponse)
+        assert response.chatId == "test-chat-123"
+        assert "Tere" in response.content
+        assert response.llmServiceActive is True
+        assert response.questionOutOfLLMScope is False
+        assert response.inputGuardFailed is False
+
+        # Check cost tracking
+        assert "costs_dict" in context_dict
+        assert "context_detection" in context_dict["costs_dict"]
+
+    @pytest.mark.asyncio
+    async def test_execute_async_greeting_english(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should handle English greeting and return response."""
+        sample_request.message = "Hello!"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            greeting_type="hello",
+            can_answer_from_context=False,
+            reasoning="English greeting detected",
+            context_snippet=None,
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is not None
+        assert "Hello" in response.content or "hello" in response.content.lower()
+
+
+class TestExecuteAsyncContextBased:
+    """Test execute_async with context-based queries."""
+
+    @pytest.mark.asyncio
+    async def test_execute_async_context_answer(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should answer from conversation history when possible."""
+        # Add conversation history
+        sample_request.conversationHistory = [
+            ConversationItem(
+                authorRole="user",
+                message="What is the tax rate?",
+                timestamp="2024-01-01T12:00:00",
+            ),
+            ConversationItem(
+                authorRole="bot",
+                message="The tax rate is 20%.",
+                timestamp="2024-01-01T12:00:01",
+            ),
+        ]
+        sample_request.message = "What was the rate you mentioned?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=True,
+            reasoning="Referring to previous conversation about tax rate",
+            context_snippet="The tax rate is 20%.",
+        )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer,
+                "detect_context",
+                return_value=(
+                    mock_analysis,
+                    {"total_cost": 0.002, "total_tokens": 100, "num_calls": 1},
+                ),
+            ),
+            patch.object(
+                context_workflow.context_analyzer,
+                "generate_context_response",
+                new_callable=AsyncMock,
+                return_value=(
+                    "The tax rate is 20%.",
+                    {"total_cost": 0.003, "num_calls": 1},
+                ),
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is not None
+        assert "20%" in response.content
+
+    @pytest.mark.asyncio
+    async def test_execute_async_cannot_answer_from_context(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return None when cannot answer from context (fallback to RAG)."""
+        sample_request.message = "What is digital signature?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=False,
+            reasoning="Query requires knowledge base search",
+            context_snippet=None,
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is None
+
+    @pytest.mark.asyncio
+    async def test_execute_async_answer_is_none(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return None when can_answer_from_context=True but context_snippet is absent."""
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            can_answer_from_context=True,
+            context_snippet=None,  # No snippet → cannot generate answer
+            reasoning="No relevant snippet found in history",
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is None
+
+
+class TestExecuteAsyncErrorHandling:
+    """Test error handling in execute_async."""
+
+    @pytest.mark.asyncio
+    async def test_execute_async_handles_analyzer_exception(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return None when context analyzer raises exception."""
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            side_effect=Exception("Analysis failed"),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is None
+
+
+class TestExecuteStreaming:
+    """Test execute_streaming functionality."""
+
+    @pytest.mark.asyncio
+    async def test_execute_streaming_greeting(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should stream greeting response."""
+        sample_request.message = "Hello!"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            greeting_type="hello",
+            can_answer_from_context=False,
+            reasoning="Greeting detected",
+            context_snippet=None,
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            stream_gen = await context_workflow.execute_streaming(sample_request, {})
+
+        assert stream_gen is not None
+
+        # Collect streamed chunks
+        chunks = [chunk async for chunk in stream_gen]
+
+        # Should have multiple chunks + END marker
+        assert len(chunks) > 1
+
+        # Last chunk should be END marker
+        last_chunk = chunks[-1]
+        assert "END" in last_chunk
+
+        # All chunks should be valid SSE format
+        for chunk in chunks:
+            assert chunk.startswith("data: ")
+            assert chunk.endswith("\n\n")
+
+    @pytest.mark.asyncio
+    async def test_execute_streaming_context_answer(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should stream context-based answer."""
+        sample_request.message = "What did you say earlier?"
+        sample_request.conversationHistory = [
+            ConversationItem(
+                authorRole="bot",
+                message="The rate is 20%.",
+                timestamp="2024-01-01T12:00:00",
+            ),
+        ]
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=True,
+            reasoning="Referring to previous message",
+            context_snippet="I mentioned that the rate is 20%.",
+        )
+
+        async def _fake_history_stream(
+            *args: object, **kwargs: object
+        ) -> AsyncGenerator[str, None]:
+            yield context_workflow.orchestration_service.format_sse(
+                sample_request.chatId, "I mentioned that the rate is 20%."
+            )
+            yield context_workflow.orchestration_service.format_sse(
+                sample_request.chatId, "END"
+            )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer,
+                "detect_context",
+                return_value=(
+                    mock_analysis,
+                    {"total_cost": 0.002, "total_tokens": 100, "num_calls": 1},
+                ),
+            ),
+            patch.object(
+                context_workflow,
+                "_create_history_stream",
+                new_callable=AsyncMock,
+                return_value=_fake_history_stream(),
+            ),
+        ):
+            stream_gen = await context_workflow.execute_streaming(sample_request, {})
+
+        assert stream_gen is not None
+
+        chunks = [chunk async for chunk in stream_gen]
+
+        assert len(chunks) > 0
+        # Verify END marker
+        assert "END" in chunks[-1]
+
+    @pytest.mark.asyncio
+    async def test_execute_streaming_cannot_answer(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return None when cannot answer (fallback to RAG)."""
+        sample_request.message = "What is digital signature?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            can_answer_from_context=False,
+            reasoning="Requires knowledge base",
+        )
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            ),
+        ):
+            stream_gen = await context_workflow.execute_streaming(sample_request, {})
+
+        assert stream_gen is None
+
+    @pytest.mark.asyncio
+    async def test_execute_streaming_handles_exception(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return None when analyzer raises exception."""
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            side_effect=Exception("Analysis failed"),
+        ):
+            stream_gen = await context_workflow.execute_streaming(sample_request, {})
+
+        assert stream_gen is None
+
+
+class TestCostTracking:
+    """Test cost tracking functionality."""
+
+    @pytest.mark.asyncio
+    async def test_cost_tracking_in_context_dict(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should track costs in context dictionary."""
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            can_answer_from_context=False,
+            reasoning="Greeting",
+        )
+
+        cost_dict = {
+            "total_cost": 0.0015,
+            "total_tokens": 75,
+            "total_prompt_tokens": 50,
+            "total_completion_tokens": 25,
+            "num_calls": 1,
+        }
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "detect_context",
+            return_value=(mock_analysis, cost_dict),
+        ):
+            context_dict = {}
+            await context_workflow.execute_async(sample_request, context_dict)
+
+        assert "costs_dict" in context_dict
+        assert "context_detection" in context_dict["costs_dict"]
+        assert context_dict["costs_dict"]["context_detection"]["total_cost"] == 0.0015
+        assert context_dict["costs_dict"]["context_detection"]["total_tokens"] == 75
+
+
+class TestLanguageDetection:
+    """Test language detection integration."""
+
+    @pytest.mark.asyncio
+    async def test_detects_estonian_language(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should detect Estonian language from query."""
+        sample_request.message = "Tere! Kuidas läheb?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            can_answer_from_context=False,
+            reasoning="Estonian greeting",
+        )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer, "detect_context"
+            ) as mock_detect,
+            patch(
+                "src.tool_classifier.greeting_constants.get_greeting_response"
+            ) as mock_greeting,
+        ):
+            mock_detect.return_value = (
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            )
+            mock_greeting.return_value = "Tere! Kuidas ma saan sind aidata?"
+
+            await context_workflow.execute_async(sample_request, {})
+
+            # Verify Estonian language was used for greeting response
+            mock_greeting.assert_called_with(greeting_type="hello", language="et")
+
+    @pytest.mark.asyncio
+    async def test_detects_english_language(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should detect English language from query."""
+        sample_request.message = "Hello! How are you?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=True,
+            can_answer_from_context=False,
+            reasoning="English greeting",
+        )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer, "detect_context"
+            ) as mock_detect,
+            patch(
+                "src.tool_classifier.greeting_constants.get_greeting_response"
+            ) as mock_greeting,
+        ):
+            mock_detect.return_value = (
+                mock_analysis,
+                {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+            )
+            mock_greeting.return_value = "Hello! How can I help you?"
+
+            await context_workflow.execute_async(sample_request, {})
+
+            # Verify English language was used for greeting response
+            mock_greeting.assert_called_with(greeting_type="hello", language="en")
+
+
+class TestExecuteAsyncSummaryBased:
+    """Test execute_async with summary-based answers."""
+
+    @pytest.mark.asyncio
+    async def test_execute_async_summary_answer(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should return response when answer comes from conversation summary."""
+        sample_request.message = "What was the tax rate we discussed earlier?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=True,
+            reasoning="Found in conversation summary",
+            context_snippet="Based on our earlier discussion, the tax rate is 20%.",
+            answered_from_summary=True,
+        )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer,
+                "detect_context",
+                return_value=(
+                    mock_analysis,
+                    {"total_cost": 0.005, "total_tokens": 200, "num_calls": 3},
+                ),
+            ),
+            patch.object(
+                context_workflow.context_analyzer,
+                "generate_context_response",
+                new_callable=AsyncMock,
+                return_value=(
+                    "Based on our earlier discussion, the tax rate is 20%.",
+                    {"total_cost": 0.003, "num_calls": 1},
+                ),
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, {})
+
+        assert response is not None
+        assert isinstance(response, OrchestrationResponse)
+        assert "20%" in response.content
+        assert response.llmServiceActive is True
+
+    @pytest.mark.asyncio
+    async def test_execute_streaming_summary_answer(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should stream summary-based answer correctly."""
+        sample_request.message = "What was the tax rate we discussed earlier?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=True,
+            reasoning="Found in conversation summary",
+            context_snippet="Based on our earlier discussion, the tax rate is 20%.",
+            answered_from_summary=True,
+        )
+
+        async def _fake_summary_stream(
+            *args: object, **kwargs: object
+        ) -> AsyncGenerator[str, None]:
+            yield context_workflow.orchestration_service.format_sse(
+                sample_request.chatId, "The tax rate is 20%."
+            )
+            yield context_workflow.orchestration_service.format_sse(
+                sample_request.chatId, "END"
+            )
+
+        with (
+            patch.object(
+                context_workflow.context_analyzer,
+                "detect_context",
+                return_value=(
+                    mock_analysis,
+                    {"total_cost": 0.005, "total_tokens": 200, "num_calls": 3},
+                ),
+            ),
+            patch.object(
+                context_workflow,
+                "_create_history_stream",
+                new_callable=AsyncMock,
+                return_value=_fake_summary_stream(),
+            ),
+        ):
+            stream_gen = await context_workflow.execute_streaming(sample_request, {})
+
+        assert stream_gen is not None
+
+        chunks = [chunk async for chunk in stream_gen]
+
+        # Should have multiple chunks + END marker
+        assert len(chunks) > 1
+        assert "END" in chunks[-1]
+
+    @pytest.mark.asyncio
+    async def test_pre_computed_summary_analysis(
+        self,
+        context_workflow: ContextWorkflowExecutor,
+        sample_request: OrchestrationRequest,
+    ) -> None:
+        """Should use pre-computed summary analysis from classifier."""
+        sample_request.message = "What was the tax rate?"
+
+        mock_analysis = ContextDetectionResult(
+            is_greeting=False,
+            greeting_type="hello",
+            can_answer_from_context=True,
+            reasoning="Found in summary",
+            context_snippet="The tax rate is 20%.",
+            answered_from_summary=True,
+        )
+
+        # Pre-computed analysis (from classifier)
+        context = {"analysis_result": mock_analysis}
+
+        with patch.object(
+            context_workflow.context_analyzer,
+            "generate_context_response",
+            new_callable=AsyncMock,
+            return_value=(
+                "The tax rate is 20%.",
+                {"total_cost": 0.003, "num_calls": 1},
+            ),
+        ):
+            response = await context_workflow.execute_async(sample_request, context)
+
+        assert response is not None
+        assert "20%" in response.content
diff --git a/tests/test_context_workflow_integration.py b/tests/test_context_workflow_integration.py
new file mode 100644
index 0000000..bca2af2
--- /dev/null
+++ b/tests/test_context_workflow_integration.py
@@ -0,0 +1,851 @@
+"""Integration tests for context workflow.
+
+Tests the full classify -> route -> execute chain with real component wiring.
+Only the LLM layer (dspy) and RAG orchestration service are mocked.
+
+These tests verify:
+- ToolClassifier.classify() correctly routes greetings to CONTEXT workflow
+- ToolClassifier.route_to_workflow() executes the context workflow end-to-end
+- Fallback from CONTEXT to RAG when context cannot answer
+- Streaming mode for context workflow responses
+- Cost tracking propagation through the classify -> execute chain
+- Error resilience (LLM failures, JSON parse errors)
+"""
+
+import pytest
+from collections.abc import AsyncGenerator, Generator
+from contextlib import AbstractContextManager
+from unittest.mock import AsyncMock, MagicMock, patch
+import json
+import dspy
+
+from src.tool_classifier.classifier import ToolClassifier
+from src.tool_classifier.context_analyzer import ContextDetectionResult
+from src.tool_classifier.models import ClassificationResult
+from src.models.request_models import (
+    OrchestrationRequest,
+    OrchestrationResponse,
+    ConversationItem,
+)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def mock_dspy_lm() -> Generator[MagicMock, None, None]:
+    """Mock DSPy LM to prevent 'No LM is loaded' errors."""
+    mock_lm = MagicMock()
+    mock_lm.history = []
+    with patch("dspy.settings") as mock_settings:
+        mock_settings.lm = mock_lm
+        # Configure DSPy with mock LM
+        dspy.configure(lm=mock_lm)
+        yield mock_lm
+
+
+@pytest.fixture
+def mock_orchestration_service() -> MagicMock:
+    """Create mock orchestration service for RAG workflow fallback."""
+    import json as _json
+    import time as _time
+
+    service = MagicMock()
+
+    # Non-streaming RAG fallback returns a valid response
+    async def mock_execute_pipeline(**kwargs: object) -> OrchestrationResponse:
+        return OrchestrationResponse(
+            chatId=kwargs["request"].chatId,
+            llmServiceActive=True,
+            questionOutOfLLMScope=False,
+            inputGuardFailed=False,
+            content="RAG fallback answer.",
+        )
+
+    service._execute_orchestration_pipeline = AsyncMock(
+        side_effect=mock_execute_pipeline
+    )
+    service._initialize_service_components = MagicMock(return_value={})
+    service._log_costs = MagicMock()
+    service.log_costs = MagicMock()
+
+    def _format_sse_impl(chat_id: str, content: str) -> str:
+        payload = {
+            "chatId": chat_id,
+            "payload": {"content": content},
+            "timestamp": int(_time.time() * 1000),
+        }
+        return f"data: {_json.dumps(payload)}\n\n"
+
+    service.format_sse = _format_sse_impl
+
+    # Streaming RAG fallback
+    async def mock_stream_pipeline(**kwargs: object) -> AsyncGenerator[str, None]:
+        yield 'data: {"chatId":"test","payload":{"content":"RAG stream"}}\n\n'
+        yield 'data: {"chatId":"test","payload":{"content":"END"}}\n\n'
+
+    service._stream_rag_pipeline = mock_stream_pipeline
+
+    return service
+
+
+@pytest.fixture
+def llm_manager() -> MagicMock:
+    """Create mock LLM manager."""
+    return MagicMock()
+
+
+@pytest.fixture
+def classifier(
+    llm_manager: MagicMock, mock_orchestration_service: MagicMock
+) -> ToolClassifier:
+    """Create a real ToolClassifier with real workflow executors."""
+    return ToolClassifier(
+        llm_manager=llm_manager,
+        orchestration_service=mock_orchestration_service,
+    )
+
+
+def _make_request(
+    message: str,
+    chat_id: str = "integration-test-chat",
+    history: list | None = None,
+) -> OrchestrationRequest:
+    """Helper to build an OrchestrationRequest."""
+    return OrchestrationRequest(
+        chatId=chat_id,
+        message=message,
+        authorId="test-user",
+        conversationHistory=history or [],
+        url="https://example.com",
+        environment="testing",
+        connection_id="test-conn",
+    )
+
+
+def _mock_dspy_greeting(answer_text: str) -> AbstractContextManager[MagicMock]:
+    """Return a patch context manager that makes dspy return a greeting analysis."""
+    mock_response = MagicMock()
+    mock_response.analysis_result = json.dumps(
+        {
+            "is_greeting": True,
+            "can_answer_from_context": False,
+            "answer": answer_text,
+            "reasoning": "Greeting detected",
+        }
+    )
+    return patch(
+        "dspy.ChainOfThought",
+        return_value=MagicMock(return_value=mock_response),
+    )
+
+
+def _mock_dspy_context_answer(
+    answer_text: str, reasoning: str = "History reference"
+) -> AbstractContextManager[MagicMock]:
+    """Return a patch that makes dspy return a context-based answer."""
+    mock_response = MagicMock()
+    mock_response.analysis_result = json.dumps(
+        {
+            "is_greeting": False,
+            "can_answer_from_context": True,
+            "answer": answer_text,
+            "reasoning": reasoning,
+        }
+    )
+    return patch(
+        "dspy.ChainOfThought",
+        return_value=MagicMock(return_value=mock_response),
+    )
+
+
+def _mock_dspy_no_match() -> AbstractContextManager[MagicMock]:
+    """Return a patch that makes dspy indicate neither greeting nor context match."""
+    mock_response = MagicMock()
+    mock_response.analysis_result = json.dumps(
+        {
+            "is_greeting": False,
+            "can_answer_from_context": False,
+            "answer": None,
+            "reasoning": "Requires knowledge base search",
+        }
+    )
+    return patch(
+        "dspy.ChainOfThought",
+        return_value=MagicMock(return_value=mock_response),
+    )
+
+
+def _patch_cost_utils() -> AbstractContextManager[MagicMock]:
+    """Patch cost tracking to avoid dspy settings dependency.
+
+    Patches at both possible module paths to handle Python's module identity
+    behaviour when src/ is on sys.path (module may be loaded as either
+    ``tool_classifier.context_analyzer`` or ``src.tool_classifier.context_analyzer``).
+    """
+    cost_return = {
+        "total_cost": 0.001,
+        "total_tokens": 50,
+        "total_prompt_tokens": 30,
+        "total_completion_tokens": 20,
+        "num_calls": 1,
+    }
+
+    import sys
+
+    # Determine which module key is actually loaded at runtime
+    if "tool_classifier.context_analyzer" in sys.modules:
+        target = "tool_classifier.context_analyzer.get_lm_usage_since"
+    else:
+        target = "src.tool_classifier.context_analyzer.get_lm_usage_since"
+
+    return patch(target, return_value=cost_return)
+
+
+# ---------------------------------------------------------------------------
+# Integration: classify -> route -> execute (non-streaming)
+# ---------------------------------------------------------------------------
+
+
+class TestClassifyAndRouteGreeting:
+    """Test full classify -> route chain for greeting queries."""
+
+    @pytest.mark.asyncio
+    async def test_greeting_classify_returns_context_workflow(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """classify() should return CONTEXT workflow for greeting queries.
+
+        With the hybrid-search classifier, classify() uses Qdrant to detect
+        service queries. When no service matches (or embedding fails in tests),
+        it falls back to CONTEXT. The analysis_result is produced later inside
+        the context workflow executor during route_to_workflow.
+        """
+        with (
+            _mock_dspy_greeting("Tere! Kuidas ma saan sind aidata?"),
+            _patch_cost_utils(),
+        ):
+            result = await classifier.classify(
+                query="Tere!",
+                conversation_history=[],
+                language="et",
+            )
+
+        # Hybrid classifier routes non-service queries to CONTEXT
+        assert result.workflow.value == "context"
+        # analysis_result is now populated during route_to_workflow, not classify
+        assert result.metadata is not None
+
+    @pytest.mark.asyncio
+    async def test_greeting_end_to_end_non_streaming(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Full chain: classify greeting -> route to context workflow -> get response."""
+        with _mock_dspy_greeting("Hello! How can I help you?"), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="Hello!",
+                conversation_history=[],
+                language="en",
+            )
+
+            request = _make_request("Hello!")
+            with patch.object(
+                classifier.context_workflow.context_analyzer,
+                "detect_context",
+                new_callable=AsyncMock,
+                return_value=(
+                    ContextDetectionResult(
+                        is_greeting=True,
+                        greeting_type="hello",
+                        can_answer_from_context=False,
+                        reasoning="Greeting detected",
+                    ),
+                    {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                ),
+            ):
+                response = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=False,
+                )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert response.chatId == "integration-test-chat"
+        assert "Hello" in response.content
+        assert response.llmServiceActive is True
+        assert response.questionOutOfLLMScope is False
+
+    @pytest.mark.asyncio
+    async def test_estonian_greeting_end_to_end(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Full chain for Estonian greeting."""
+        with (
+            _mock_dspy_greeting("Tere! Kuidas ma saan sind aidata?"),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="Tere!",
+                conversation_history=[],
+                language="et",
+            )
+
+            request = _make_request("Tere!")
+            with patch.object(
+                classifier.context_workflow.context_analyzer,
+                "detect_context",
+                new_callable=AsyncMock,
+                return_value=(
+                    ContextDetectionResult(
+                        is_greeting=True,
+                        greeting_type="hello",
+                        can_answer_from_context=False,
+                        reasoning="Estonian greeting detected",
+                    ),
+                    {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                ),
+            ):
+                response = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=False,
+                )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "Tere" in response.content
+
+    @pytest.mark.asyncio
+    async def test_goodbye_end_to_end(self, classifier: ToolClassifier) -> None:
+        """Full chain for goodbye greeting."""
+        with _mock_dspy_greeting("Goodbye! Have a great day!"), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="Goodbye!",
+                conversation_history=[],
+                language="en",
+            )
+
+            request = _make_request("Goodbye!")
+            with patch.object(
+                classifier.context_workflow.context_analyzer,
+                "detect_context",
+                new_callable=AsyncMock,
+                return_value=(
+                    ContextDetectionResult(
+                        is_greeting=True,
+                        greeting_type="goodbye",
+                        can_answer_from_context=False,
+                        reasoning="Goodbye detected",
+                    ),
+                    {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                ),
+            ):
+                response = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=False,
+                )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "Goodbye" in response.content
+
+    @pytest.mark.asyncio
+    async def test_thanks_end_to_end(self, classifier: ToolClassifier) -> None:
+        """Full chain for thanks greeting."""
+        with (
+            _mock_dspy_greeting("You're welcome! Feel free to ask more."),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="Thank you!",
+                conversation_history=[],
+                language="en",
+            )
+
+            request = _make_request("Thank you!")
+            with patch.object(
+                classifier.context_workflow.context_analyzer,
+                "detect_context",
+                new_callable=AsyncMock,
+                return_value=(
+                    ContextDetectionResult(
+                        is_greeting=True,
+                        greeting_type="thanks",
+                        can_answer_from_context=False,
+                        reasoning="Thanks detected",
+                    ),
+                    {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                ),
+            ):
+                response = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=False,
+                )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "welcome" in response.content.lower()
+
+
+class TestClassifyAndRouteContextAnswer:
+    """Test full classify -> route chain for context-based answers."""
+
+    @pytest.mark.asyncio
+    async def test_context_answer_end_to_end(self, classifier: ToolClassifier) -> None:
+        """Full chain: classify history query -> route to context -> get answer."""
+        history = [
+            ConversationItem(
+                authorRole="user",
+                message="What is the tax rate?",
+                timestamp="2024-01-01T12:00:00",
+            ),
+            ConversationItem(
+                authorRole="bot",
+                message="The tax rate is 20%.",
+                timestamp="2024-01-01T12:00:01",
+            ),
+        ]
+
+        with (
+            _mock_dspy_context_answer("I mentioned the tax rate is 20%."),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="What was the rate?",
+                conversation_history=history,
+                language="en",
+            )
+
+            request = _make_request("What was the rate?", history=history)
+            with (
+                patch.object(
+                    classifier.context_workflow.context_analyzer,
+                    "detect_context",
+                    new_callable=AsyncMock,
+                    return_value=(
+                        ContextDetectionResult(
+                            is_greeting=False,
+                            greeting_type="hello",
+                            can_answer_from_context=True,
+                            reasoning="Tax rate referenced in history",
+                            context_snippet="The tax rate is 20%.",
+                        ),
+                        {"total_cost": 0.002, "total_tokens": 100, "num_calls": 1},
+                    ),
+                ),
+                patch.object(
+                    classifier.context_workflow.context_analyzer,
+                    "generate_context_response",
+                    new_callable=AsyncMock,
+                    return_value=(
+                        "I mentioned the tax rate is 20%.",
+                        {"total_cost": 0.003, "num_calls": 1},
+                    ),
+                ),
+            ):
+                response = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=False,
+                )
+
+        assert classification.workflow.value == "context"
+        assert isinstance(response, OrchestrationResponse)
+        assert "20%" in response.content
+
+    @pytest.mark.asyncio
+    async def test_context_answer_with_long_history(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Should pass last 10 turns to the analyzer even with longer history."""
+        history = [
+            ConversationItem(
+                authorRole="user" if i % 2 == 0 else "bot",
+                message=f"Message {i}",
+                timestamp=f"2024-01-01T12:00:{i:02d}",
+            )
+            for i in range(15)
+        ]
+
+        with (
+            _mock_dspy_context_answer("Based on our conversation, here's the answer."),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="What did we discuss?",
+                conversation_history=history,
+                language="en",
+            )
+
+            request = _make_request("What did we discuss?", history=history)
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        assert classification.workflow.value == "context"
+        assert isinstance(response, OrchestrationResponse)
+        assert response.content is not None
+
+
+# ---------------------------------------------------------------------------
+# Integration: fallback from CONTEXT to RAG
+# ---------------------------------------------------------------------------
+
+
+class TestContextToRAGFallback:
+    """Test that context workflow falls back to RAG when it cannot answer."""
+
+    @pytest.mark.asyncio
+    async def test_classify_defaults_to_rag_when_no_context_match(
+        self, classifier: ToolClassifier, mock_orchestration_service: MagicMock
+    ) -> None:
+        """When context analyzer can't answer, the full route chain ends at RAG.
+
+        With the hybrid-search classifier, classify() returns CONTEXT for
+        non-service queries. The RAG fallback is triggered inside
+        route_to_workflow when the context workflow returns None.
+        """
+        with _mock_dspy_no_match(), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="What is a digital signature?",
+                conversation_history=[],
+                language="en",
+            )
+
+            # Classifier routes non-service queries to CONTEXT first
+            assert classification.workflow.value == "context"
+
+            # Full route: context can't answer → falls back to RAG
+            request = _make_request("What is a digital signature?")
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "RAG" in response.content
+
+    @pytest.mark.asyncio
+    async def test_fallback_to_rag_end_to_end(
+        self, classifier: ToolClassifier, mock_orchestration_service: MagicMock
+    ) -> None:
+        """Full chain: context can't answer -> falls back to RAG -> gets RAG response."""
+        with _mock_dspy_no_match(), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="What is a digital signature?",
+                conversation_history=[],
+                language="en",
+            )
+
+            # Hybrid classifier routes to CONTEXT first; RAG is via fallback
+            assert classification.workflow.value == "context"
+
+            request = _make_request("What is a digital signature?")
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        assert isinstance(response, OrchestrationResponse)
+        # RAG mock returns "RAG fallback answer."
+        assert "RAG" in response.content
+
+    @pytest.mark.asyncio
+    async def test_context_workflow_returns_none_triggers_rag_fallback(
+        self, classifier: ToolClassifier, mock_orchestration_service: MagicMock
+    ) -> None:
+        """When context workflow returns None during routing, RAG fallback is used."""
+        # Force classification to CONTEXT but with an analysis that will produce None
+        no_answer_analysis = ContextDetectionResult(
+            is_greeting=False,
+            can_answer_from_context=False,
+            answer=None,
+            reasoning="Cannot answer",
+        )
+
+        # Use the WorkflowType from the same module path the classifier uses
+        from tool_classifier.enums import WorkflowType as _WorkflowType
+
+        forced_classification = ClassificationResult(
+            workflow=_WorkflowType.CONTEXT,
+            confidence=0.95,
+            metadata={"analysis_result": no_answer_analysis},
+            reasoning="Forced for test",
+        )
+
+        request = _make_request("Something that context can't answer")
+        response = await classifier.route_to_workflow(
+            classification=forced_classification,
+            request=request,
+            is_streaming=False,
+        )
+
+        assert isinstance(response, OrchestrationResponse)
+        # Should have fallen through to RAG
+        assert "RAG" in response.content
+
+
+# ---------------------------------------------------------------------------
+# Integration: streaming mode
+# ---------------------------------------------------------------------------
+
+
+class TestStreamingIntegration:
+    """Test the full classify -> route -> stream chain."""
+
+    @pytest.mark.asyncio
+    async def test_streaming_greeting_end_to_end(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Full chain: classify greeting -> route streaming -> collect SSE chunks."""
+        with _mock_dspy_greeting("Hello! How can I help you?"), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="Hello!",
+                conversation_history=[],
+                language="en",
+            )
+
+            request = _make_request("Hello!")
+            with patch.object(
+                classifier.context_workflow.context_analyzer,
+                "detect_context",
+                new_callable=AsyncMock,
+                return_value=(
+                    ContextDetectionResult(
+                        is_greeting=True,
+                        greeting_type="hello",
+                        can_answer_from_context=False,
+                        reasoning="Greeting detected",
+                    ),
+                    {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                ),
+            ):
+                stream = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=True,
+                )
+
+                # Collect chunks inside the mock context so the dspy patch is active
+                # when the async generator body executes (lazy evaluation).
+                chunks = [chunk async for chunk in stream]
+
+        # Should have content chunks + END marker
+        assert len(chunks) >= 2
+        for chunk in chunks:
+            assert chunk.startswith("data: ")
+            assert chunk.endswith("\n\n")
+
+        # Last chunk should contain END
+        last_payload = json.loads(chunks[-1][6:-2])
+        assert last_payload["payload"]["content"] == "END"
+
+        # Reconstruct content from non-END chunks
+        content_parts = []
+        for chunk in chunks[:-1]:
+            payload = json.loads(chunk[6:-2])
+            content_parts.append(payload["payload"]["content"])
+        full_content = "".join(content_parts)
+        assert "Hello" in full_content
+
+    @pytest.mark.asyncio
+    async def test_streaming_context_answer_end_to_end(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Full chain: classify history query -> route streaming -> collect answer."""
+        history = [
+            ConversationItem(
+                authorRole="bot",
+                message="The deadline is March 31st.",
+                timestamp="2024-01-01T12:00:00",
+            ),
+        ]
+
+        with (
+            _mock_dspy_context_answer("The deadline is March 31st."),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="When is the deadline?",
+                conversation_history=history,
+                language="en",
+            )
+
+            request = _make_request("When is the deadline?", history=history)
+            stream = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=True,
+            )
+
+            chunks = [chunk async for chunk in stream]
+
+        assert len(chunks) >= 2
+        last_payload = json.loads(chunks[-1][6:-2])
+        assert last_payload["payload"]["content"] == "END"
+
+    @pytest.mark.asyncio
+    async def test_streaming_fallback_to_rag(
+        self, classifier: ToolClassifier, mock_orchestration_service: MagicMock
+    ) -> None:
+        """Streaming: context can't answer -> falls back to RAG streaming."""
+        # Force classification to CONTEXT with no answer
+        no_answer_analysis = ContextDetectionResult(
+            is_greeting=False,
+            can_answer_from_context=False,
+            answer=None,
+            reasoning="Cannot answer",
+        )
+
+        from tool_classifier.enums import WorkflowType as _WorkflowType
+
+        forced_classification = ClassificationResult(
+            workflow=_WorkflowType.CONTEXT,
+            confidence=0.95,
+            metadata={"analysis_result": no_answer_analysis},
+            reasoning="Forced for test",
+        )
+
+        request = _make_request("Something needing RAG")
+        stream = await classifier.route_to_workflow(
+            classification=forced_classification,
+            request=request,
+            is_streaming=True,
+        )
+
+        chunks = [chunk async for chunk in stream]
+
+        # Should have received RAG streaming output
+        assert len(chunks) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Integration: cost tracking across the chain
+# ---------------------------------------------------------------------------
+
+
+class TestCostTrackingIntegration:
+    """Test that cost data flows through the full classify -> execute chain."""
+
+    @pytest.mark.asyncio
+    async def test_costs_propagated_through_classification(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """Cost dict from context analysis should be tracked during workflow execution.
+
+        With the hybrid-search classifier, costs are tracked inside the context
+        workflow executor (execute_async/execute_streaming), not in classify().
+        The cost dict is stored in the workflow's internal context dictionary.
+        """
+        with _mock_dspy_greeting("Hello!"), _patch_cost_utils():
+            classification = await classifier.classify(
+                query="Hello!",
+                conversation_history=[],
+                language="en",
+            )
+
+            # Verify classify succeeded and routes to CONTEXT
+            assert classification.workflow.value == "context"
+
+            # Execute the workflow to trigger cost tracking
+            request = _make_request("Hello!")
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        # Verify workflow ran successfully (costs tracked internally)
+        assert isinstance(response, OrchestrationResponse)
+        assert response.chatId == "integration-test-chat"
+
+
+# ---------------------------------------------------------------------------
+# Integration: error resilience
+# ---------------------------------------------------------------------------
+
+
+class TestErrorResilience:
+    """Test that errors in context analysis gracefully fall back to RAG."""
+
+    @pytest.mark.asyncio
+    async def test_llm_exception_falls_back_to_rag(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """If context analyzer LLM call raises, the route chain falls back to RAG.
+
+        With the hybrid-search classifier, classify() returns CONTEXT for
+        non-service queries. When the context workflow LLM call raises, the
+        context workflow returns None and route_to_workflow falls back to RAG.
+        """
+        with (
+            patch(
+                "dspy.ChainOfThought",
+                return_value=MagicMock(side_effect=Exception("LLM unavailable")),
+            ),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="Hello!",
+                conversation_history=[],
+                language="en",
+            )
+
+            # classify() returns CONTEXT (non-service query)
+            assert classification.workflow.value == "context"
+
+            # Full route: context LLM fails → falls back to RAG gracefully
+            request = _make_request("Hello!")
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "RAG" in response.content
+
+    @pytest.mark.asyncio
+    async def test_json_parse_error_falls_back_to_rag(
+        self, classifier: ToolClassifier
+    ) -> None:
+        """If LLM returns invalid JSON, the route chain falls back to RAG.
+
+        JSON parse failure causes context analysis to return is_greeting=False,
+        answer=None. The context workflow then returns None and the fallback
+        chain routes to RAG.
+        """
+        mock_response = MagicMock()
+        mock_response.analysis_result = "not valid json at all"
+
+        with (
+            patch(
+                "dspy.ChainOfThought",
+                return_value=MagicMock(return_value=mock_response),
+            ),
+            _patch_cost_utils(),
+        ):
+            classification = await classifier.classify(
+                query="Hello!",
+                conversation_history=[],
+                language="en",
+            )
+
+            # classify() returns CONTEXT (non-service query)
+            assert classification.workflow.value == "context"
+
+            # Full route: JSON parse fails → context returns None → RAG fallback
+            request = _make_request("Hello!")
+            response = await classifier.route_to_workflow(
+                classification=classification,
+                request=request,
+                is_streaming=False,
+            )
+
+        assert isinstance(response, OrchestrationResponse)
+        assert "RAG" in response.content

From c2ef115fb05fc507b7cfcd9558d930877e6a05c4 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 13 Mar 2026 09:50:10 +0530
Subject: [PATCH 25/27] delete unnessary files

---
 new.txt | 38 --------------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 new.txt

diff --git a/new.txt b/new.txt
deleted file mode 100644
index 9e7525f..0000000
--- a/new.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-1️⃣ Broneeringu kinnitus (Booking Confirmation)
-
-Estonian → English
-
-Kas minu broneering on kinnitatud?
-→ Is my booking confirmed?
-
-Palun kinnita minu broneering.
-→ Please confirm my booking.
-
-Kas broneering sai edukalt tehtud?
-→ Was the booking successfully made?
-
-2️⃣ Kalastusloa uuendamise teade (Fishing License Renewal)
-
-Estonian → English
-
-Kas minu kalastusluba tuleb uuendada?
-→ Do I need to renew my fishing license?
-
-Millal mu kalastusluba aegub?
-→ When does my fishing license expire?
-
-Kas mu kalastusluba on veel kehtiv?
-→ Is my fishing license still valid?
-
-3️⃣ Koolivaheajad (School Holidays)
-
-Estonian → English
-
-Millal on järgmine koolivaheaeg?
-→ When is the next school holiday?
-
-Kas sa saad öelda selle aasta koolivaheajad?
-→ Can you tell me the school holidays for this year?
-
-Millal algab suvevaheaeg?
-→ When does the summer holiday start?
\ No newline at end of file

From 97f6f1ad7d0573c71c5912e6dae6f04c2540f529 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 13 Mar 2026 10:57:54 +0530
Subject: [PATCH 26/27] added requested changes

---
 tests/test_context_workflow.py             | 20 +++++----
 tests/test_context_workflow_integration.py | 51 +++++++++++++++++-----
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/tests/test_context_workflow.py b/tests/test_context_workflow.py
index 1362a72..9a6d7e7 100644
--- a/tests/test_context_workflow.py
+++ b/tests/test_context_workflow.py
@@ -7,7 +7,7 @@
 
 from src.tool_classifier.workflows.context_workflow import ContextWorkflowExecutor
 from src.tool_classifier.context_analyzer import ContextDetectionResult
-from models.request_models import (
+from src.models.request_models import (
     OrchestrationRequest,
     OrchestrationResponse,
     ConversationItem,
@@ -113,12 +113,13 @@ async def test_execute_async_greeting_estonian(
 
         with patch.object(
             context_workflow.context_analyzer,
-            "detect_context",
-            return_value=(
+            "detect_context_with_summary_fallback",
+            new_callable=AsyncMock,
+        ) as mock_detect:
+            mock_detect.return_value = (
                 mock_analysis,
                 {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
-            ),
-        ):
+            )
             context_dict = {}
             response = await context_workflow.execute_async(
                 sample_request, context_dict
@@ -155,12 +156,13 @@ async def test_execute_async_greeting_english(
 
         with patch.object(
             context_workflow.context_analyzer,
-            "detect_context",
-            return_value=(
+            "detect_context_with_summary_fallback",
+            new_callable=AsyncMock,
+        ) as mock_detect:
+            mock_detect.return_value = (
                 mock_analysis,
                 {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
-            ),
-        ):
+            )
             response = await context_workflow.execute_async(sample_request, {})
 
         assert response is not None
diff --git a/tests/test_context_workflow_integration.py b/tests/test_context_workflow_integration.py
index bca2af2..a11a7f4 100644
--- a/tests/test_context_workflow_integration.py
+++ b/tests/test_context_workflow_integration.py
@@ -253,7 +253,7 @@ async def test_greeting_end_to_end_non_streaming(
             request = _make_request("Hello!")
             with patch.object(
                 classifier.context_workflow.context_analyzer,
-                "detect_context",
+                "detect_context_with_summary_fallback",
                 new_callable=AsyncMock,
                 return_value=(
                     ContextDetectionResult(
@@ -295,7 +295,7 @@ async def test_estonian_greeting_end_to_end(
             request = _make_request("Tere!")
             with patch.object(
                 classifier.context_workflow.context_analyzer,
-                "detect_context",
+                "detect_context_with_summary_fallback",
                 new_callable=AsyncMock,
                 return_value=(
                     ContextDetectionResult(
@@ -329,7 +329,7 @@ async def test_goodbye_end_to_end(self, classifier: ToolClassifier) -> None:
             request = _make_request("Goodbye!")
             with patch.object(
                 classifier.context_workflow.context_analyzer,
-                "detect_context",
+                "detect_context_with_summary_fallback",
                 new_callable=AsyncMock,
                 return_value=(
                     ContextDetectionResult(
@@ -366,7 +366,7 @@ async def test_thanks_end_to_end(self, classifier: ToolClassifier) -> None:
             request = _make_request("Thank you!")
             with patch.object(
                 classifier.context_workflow.context_analyzer,
-                "detect_context",
+                "detect_context_with_summary_fallback",
                 new_callable=AsyncMock,
                 return_value=(
                     ContextDetectionResult(
@@ -421,7 +421,7 @@ async def test_context_answer_end_to_end(self, classifier: ToolClassifier) -> No
             with (
                 patch.object(
                     classifier.context_workflow.context_analyzer,
-                    "detect_context",
+                    "detect_context_with_summary_fallback",
                     new_callable=AsyncMock,
                     return_value=(
                         ContextDetectionResult(
@@ -613,7 +613,7 @@ async def test_streaming_greeting_end_to_end(
             request = _make_request("Hello!")
             with patch.object(
                 classifier.context_workflow.context_analyzer,
-                "detect_context",
+                "detect_context_with_summary_fallback",
                 new_callable=AsyncMock,
                 return_value=(
                     ContextDetectionResult(
@@ -666,6 +666,10 @@ async def test_streaming_context_answer_end_to_end(
             ),
         ]
 
+        async def _mock_history_stream() -> AsyncGenerator[str, None]:
+            yield 'data: {"chatId":"integration-test-chat","payload":{"content":"The deadline is March 31st."}}\n\n'
+            yield 'data: {"chatId":"integration-test-chat","payload":{"content":"END"}}\n\n'
+
         with (
             _mock_dspy_context_answer("The deadline is March 31st."),
             _patch_cost_utils(),
@@ -677,13 +681,36 @@ async def test_streaming_context_answer_end_to_end(
             )
 
             request = _make_request("When is the deadline?", history=history)
-            stream = await classifier.route_to_workflow(
-                classification=classification,
-                request=request,
-                is_streaming=True,
-            )
+            with (
+                patch.object(
+                    classifier.context_workflow.context_analyzer,
+                    "detect_context_with_summary_fallback",
+                    new_callable=AsyncMock,
+                    return_value=(
+                        ContextDetectionResult(
+                            is_greeting=False,
+                            greeting_type="hello",
+                            can_answer_from_context=True,
+                            reasoning="Deadline referenced in history",
+                            context_snippet="The deadline is March 31st.",
+                        ),
+                        {"total_cost": 0.001, "total_tokens": 50, "num_calls": 1},
+                    ),
+                ),
+                patch.object(
+                    classifier.context_workflow,
+                    "_create_history_stream",
+                    new_callable=AsyncMock,
+                    return_value=_mock_history_stream(),
+                ),
+            ):
+                stream = await classifier.route_to_workflow(
+                    classification=classification,
+                    request=request,
+                    is_streaming=True,
+                )
 
-            chunks = [chunk async for chunk in stream]
+                chunks = [chunk async for chunk in stream]
 
         assert len(chunks) >= 2
         last_payload = json.loads(chunks[-1][6:-2])

From 58a64e98d30c19eda8232d8b66b5dd4647ca8d52 Mon Sep 17 00:00:00 2001
From: nuwangeek <charith.bimsara@rootcode.io>
Date: Fri, 20 Mar 2026 14:09:24 +0530
Subject: [PATCH 27/27] fixed open reference and context links in a new tab
 issue

---
 GUI/src/pages/TestModel/index.tsx | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/GUI/src/pages/TestModel/index.tsx b/GUI/src/pages/TestModel/index.tsx
index 4829d12..92e7dfd 100644
--- a/GUI/src/pages/TestModel/index.tsx
+++ b/GUI/src/pages/TestModel/index.tsx
@@ -1,7 +1,7 @@
 import { useMutation, useQuery } from '@tanstack/react-query';
 import { Button, FormSelect, FormTextarea, Collapsible } from 'components';
 import CircularSpinner from 'components/molecules/CircularSpinner/CircularSpinner';
-import { FC, useState } from 'react';
+import { ComponentPropsWithoutRef, FC, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
@@ -87,6 +87,17 @@ const TestLLM: FC = () => {
     }));
   };
 
+  const markdownComponents = {
+    ol: ({children}: any) => (
+      <ol style={{ paddingLeft: '1.5rem', listStyleType: 'decimal' }}>
+        {children}
+      </ol>
+    ),
+    a: (props: ComponentPropsWithoutRef<"a">) => (
+      <a {...props} target="_blank" rel="noopener noreferrer" />
+    ),
+  };
+
   return (
     <div>
       {isLoadingConnections ? (
@@ -141,7 +152,7 @@ const TestLLM: FC = () => {
               <div className="result-item">
                 <strong>Response:</strong>
                 <div className="response-content">
-                  <ReactMarkdown remarkPlugins={[remarkGfm]}>
+                  <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
                     {inferenceResult.content}
                   </ReactMarkdown>
                 </div>
@@ -159,7 +170,7 @@ const TestLLM: FC = () => {
                               <strong>Rank {contextItem.rank}</strong>
                             </div>
                             <div className="context-content">
-                              <ReactMarkdown remarkPlugins={[remarkGfm]}>
+                              <ReactMarkdown remarkPlugins={[remarkGfm]} components={markdownComponents}>
                                 {contextItem.chunkRetrieved}
                               </ReactMarkdown>
                             </div>