rootcodelabs · nuwangeek · Mar 18, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/.env.gui b/.env.gui
@@ -1,4 +1,4 @@
-RELEASE=test
+RELEASE=gui
 VERSION=1
 BUILD=1
 FIX=0
diff --git a/.env.llm_orchestration_service b/.env.llm_orchestration_service
@@ -0,0 +1,4 @@
+RELEASE=orchestration
+VERSION=1
+BUILD=1
+FIX=0
diff --git a/.github/workflows/ci-build-image-llm-orchestration-service.yml b/.github/workflows/ci-build-image-llm-orchestration-service.yml
@@ -0,0 +1,42 @@
+name: Build and publish llm_orchestration_service
+
+on:
+  push:
+    branches:
+      - wip
+    paths:
+      - '.env.llm_orchestration_service'
+
+jobs:
+  PackageDeploy:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Docker Setup BuildX
+        uses: docker/setup-buildx-action@v2
+
+      - name: Load environment variables and set them
+        run: |
+          if [ -f .env.llm_orchestration_service ]; then
+            export $(cat .env.llm_orchestration_service | grep -v '^#' | xargs)
+          fi
+          echo "RELEASE=$RELEASE" >> $GITHUB_ENV
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+          echo "BUILD=$BUILD" >> $GITHUB_ENV
+          echo "FIX=$FIX" >> $GITHUB_ENV
+      - name: Set repo
+        run: |
+           LOWER_CASE_GITHUB_REPOSITORY=$(echo $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]')
+           echo "DOCKER_TAG_CUSTOM=ghcr.io/${LOWER_CASE_GITHUB_REPOSITORY}:$RELEASE-$VERSION.$BUILD.$FIX" >> $GITHUB_ENV
+           echo "$GITHUB_ENV"
+      - name: Docker Build
+        run: | 
+           docker image build --tag $DOCKER_TAG_CUSTOM -f Dockerfile.llm_orchestration_service .
+
+      - name: Log in to GitHub container registry
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin
+
+      - name: Push Docker image to ghcr
+        run: docker push $DOCKER_TAG_CUSTOM
diff --git a/DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars b/DSL/DMapper/rag-search/hbs/bot_responses_to_messages.handlebars
@@ -0,0 +1,14 @@
+[
+{{#each data.botMessages}}
+  {
+   "chatId": "{{../data.chatId}}",
+   "content": "{{filterControlCharacters result}}",
+   "buttons": "[{{#each ../data.buttons}}{\"title\": \"{{#if (eq title true)}}Yes{{else if (eq title false)}}No{{else}}{{{title}}}{{/if}}\",\"payload\": \"{{{payload}}}\"}{{#unless @last}},{{/unless}}{{/each}}]",
+   "authorTimestamp": "{{../data.authorTimestamp}}",
+   "authorId": "{{../data.authorId}}",
+   "authorFirstName": "{{../data.authorFirstName}}",
+   "authorLastName": "{{../data.authorLastName}}",
+   "created": "{{../data.created}}"
+  }{{#unless @last}},{{/unless}}
+{{/each}}
+]
diff --git a/DSL/DMapper/rag-search/lib/helpers.js b/DSL/DMapper/rag-search/lib/helpers.js
@@ -168,6 +168,11 @@ export function getAgencyDataAvailable(agencyId) {
   return (combinedValue % 2) === 0;
 }
 
+export function filterControlCharacters(str) {
+  if (typeof str !== "string") return str;
+  return str.replace(/[\x00-\x1F\x7F]/g, " ");
+}
+
 export function json(context) {
   return JSON.stringify(context);
 }
@@ -269,3 +274,27 @@ export function filterDataByAgency(aggregatedData, startIndex, agencyId, pageSiz
   return JSON.stringify(result);
 
 }
+
+export function calculateDateDifference(value) {
+  const { startDate, endDate, outputType } = value;
+  const sDate = new Date(startDate);
+  const eDate = new Date(endDate);
+  const timeDifferenceInSeconds = (eDate.getTime() - sDate.getTime()) / 1000;
+
+  switch (outputType?.toLowerCase()) {
+    case 'years':
+      return eDate.getFullYear() - sDate.getFullYear();
+    case 'months':
+      return eDate.getMonth() - sDate.getMonth() +
+        (12 * (eDate.getFullYear() - sDate.getFullYear()))
+    case 'hours':
+      return Math.round(Math.abs(eDate - sDate) / 36e5);
+    case 'minutes':
+      return Math.floor(timeDifferenceInSeconds / 60);
+    case 'seconds':
+      return timeDifferenceInSeconds;
+    default:
+      return Math.round(timeDifferenceInSeconds / (3600 * 24));
+  }
+}
+
diff --git a/constants.ini b/constants.ini
@@ -9,4 +9,7 @@ RAG_SEARCH_CRON_MANAGER=http://cron-manager:9010
 RAG_SEARCH_LLM_ORCHESTRATOR=http://llm-orchestration-service:8100/orchestrate
 RAG_SEARCH_PROMPT_REFRESH=http://llm-orchestration-service:8100/prompt-config/refresh
 DOMAIN=localhost
-DB_PASSWORD=dbadmin
+DB_PASSWORD=dbadmin
+RAG_SEARCH_RUUTER_PUBLIC_INTERNAL_SERVICE=http://ruuter-public:8086/services
+SERVICE_DMAPPER_HBS=http://data-mapper:3000/hbs/rag-search
+SERVICE_PROJECT_LAYER=services
diff --git a/docs/HYBRID_SEARCH_CLASSIFICATION.md b/docs/HYBRID_SEARCH_CLASSIFICATION.md
@@ -53,7 +53,8 @@ The system has two phases:
 | `src/intent_data_enrichment/main_enrichment.py` | Orchestrates per-example and summary point creation |
 | `src/intent_data_enrichment/qdrant_manager.py` | Qdrant collection management, upsert, and deletion |
 | `src/intent_data_enrichment/api_client.py` | LLM API calls (context generation, embeddings) |
-| `src/intent_data_enrichment/models.py` | `EnrichedService` data model |
+| `src/intent_data_enrichment/models.py` | `ServiceData`, `EnrichedService`, `EnrichmentResult` data models |
+| `src/intent_data_enrichment/constants.py` | `EnrichmentConstants` — API URLs, Qdrant config, vector sizes, LLM prompt template |
 | `src/tool_classifier/sparse_encoder.py` | BM25-style sparse vector computation |
 
 ### What Changed: Single Embedding → Per-Example Indexing
@@ -78,8 +79,8 @@ Service "Valuutakursid" → 4 Qdrant points
     dense:  3072-dim embedding of this exact text
     sparse: BM25 vector → {euro: 1.0, gbp: 1.0, kurss: 1.0, ...}
 
-  Point 3 (summary): "Valuutakursid - Kasutaja soovib infot..."
-    dense:  3072-dim embedding of name + description + LLM context
+  Point 3 (summary): "Service Name: Valuutakursid\nDescription: ...\nExample Queries: ...\nRequired Entities: ...\nEnriched Context: ..."
+    dense:  3072-dim embedding of combined text
     sparse: BM25 vector of combined text
 ```
 
@@ -101,9 +102,12 @@ Service "Valuutakursid" → 4 Qdrant points
 
 ```python
 # sparse_encoder.py
+SPARSE_VOCAB_SIZE = 50_000
+
 text = "Mis suhe on euro ja usd vahel"
 tokens = re.findall(r"\w+", text.lower())  # ["mis", "suhe", "on", "euro", ...]
-# Each token → hashed to index in [0, VOCAB_SIZE), value = term frequency
+# Each token → MD5 hash (first 4 bytes) to index in [0, SPARSE_VOCAB_SIZE), value = term frequency
+# Collisions are handled by summing values at the same index
 # Output: SparseVector(indices=[hash("mis"), hash("euro"), ...], values=[1.0, 1.0, ...])
 ```
 
@@ -146,7 +150,7 @@ service_enrichment.sh
   │    ├─ Generate dense embedding (text-embedding-3-large)
   │    └─ Generate sparse vector (BM25 term hashing)
   │
-  ├─ Step 3: Summary point (name + description + LLM context):
+  ├─ Step 3: Summary point (name + description + examples + entities + LLM context):
   │    ├─ Generate dense embedding
   │    └─ Generate sparse vector
   │
@@ -155,6 +159,17 @@ service_enrichment.sh
   └─ Step 5: Bulk upsert N+1 points to Qdrant
 ```
 
+### Summary Point Combined Text Format
+
+The summary point embeds a structured concatenation:
+```
+Service Name: {name}
+Description: {description}
+Example Queries: {example1} | {example2} | ...
+Required Entities: {entity1}, {entity2}, ...
+Enriched Context: {LLM-generated context}
+```
+
 ### Service Deletion
 
 When a service is deactivated, all its points are removed:
@@ -186,12 +201,12 @@ POST /collections/intent_collections/points/query
 {
     "query": [0.023, -0.041, ...],  # 3072-dim dense vector
     "using": "dense",
-    "limit": 6,
+    "limit": 6,                     # DENSE_SEARCH_TOP_K * 2 (3 * 2 = 6, allows dedup)
     "with_payload": true
 }
 ```
 
-Results are deduplicated by `service_id` (best score per service).
+Results are deduplicated by `service_id` (best score per service), returning up to `DENSE_SEARCH_TOP_K` (3) unique services.
 
 **Why not use RRF scores?**  
 Qdrant's RRF uses `1/(1+rank)`, producing fixed scores (0.50, 0.33, 0.25) regardless of actual relevance. A perfect match and a random query both get 0.50 for rank 1. Cosine similarity reflects true semantic closeness.
@@ -203,6 +218,7 @@ Sparse prefetch is only included if the query produces a non-empty sparse vector
 
 ```python
 # classifier.py → _hybrid_search()
+# First checks collection exists and has data (points_count > 0)
 POST /collections/intent_collections/points/query
 {
     "prefetch": [
@@ -215,6 +231,10 @@ POST /collections/intent_collections/points/query
 }
 ```
 
+> **Note:** Prefetch limit is `HYBRID_SEARCH_TOP_K * 2` (5 * 2 = 10). The sparse prefetch is conditionally added only when `sparse_vector.is_empty()` is False.
+
+Hybrid results are also deduplicated by `service_id` (best RRF score per service).
+
 ### Routing Decision
 
 ```
@@ -251,6 +271,7 @@ Dense: Valuutakursid (cosine=0.5511), gap=0.2371
 → Runs intent detection + entity extraction on matched service only
 → Entities: {currency_from: EUR, currency_to: THB}
 → Validation: PASSED ✓
+→ Calls service endpoint → Returns response
 ```
 
 ### Path 3: AMBIGUOUS Service Match → LLM Confirmation
@@ -285,17 +306,17 @@ SERVICE (Layer 1)  →  CONTEXT (Layer 2)  →  RAG (Layer 3)  →  OOD (Layer 4
 | Path | Intent Detection | Entity Extraction |
 |------|-----------------|-------------------|
 | HIGH-CONFIDENCE | On 1 service (matched) | Yes — from LLM output |
-| AMBIGUOUS | On 2-3 candidates | Yes — if LLM matches |
+| AMBIGUOUS | On top candidates (from `top_results`) | Yes — if LLM matches |
 | Non-service | Not run | Not run |
 
 ### Intent Detection Module (DSPy)
 
 **File:** `src/tool_classifier/intent_detector.py`
 
-The DSPy `IntentDetectionModule` receives:
+The DSPy `IntentDetectionModule` uses `dspy.Predict` (direct prediction) and receives:
 - User query
-- Candidate services (formatted as JSON)
-- Conversation history (last 3 turns)
+- Candidate services (formatted as JSON with service_id, name, description, required_entities, top 3 examples)
+- Conversation history (last 3 turns, formatted as `{authorRole}: {message}`)
 
 It returns:
 ```json
@@ -336,6 +357,18 @@ Entities dict → ordered array matching service schema:
 # Array:  ["EUR", "THB"]
 ```
 
+### Service Endpoint Call
+
+After entity validation and transformation, the workflow calls the Ruuter active service endpoint:
+
+```python
+# Endpoint: {RUUTER_SERVICE_BASE_URL}/services/active/{clean_service_name}
+# Payload: {"chatId": "...", "authorId": "...", "input": ["EUR", "THB"]}
+# Response: {"response": [{"content": "..."}]} → extracts content string
+```
+
+In streaming mode, the service content is wrapped as SSE events and streamed to the client.
+
 ---
 
 ## Thresholds & Configuration
@@ -387,7 +420,3 @@ Based on empirical testing with 42 Estonian queries (20 SERVICE, 22 RAG):
 - **Adding more services:** Score distributions improve naturally — service queries score higher, non-service score lower.
 - **Adding more examples per service:** Diverse phrasings expand the embedding coverage. Aim for 5-8 examples per service covering formal + informal + different word orders.
 - **Adjusting thresholds:** Monitor the logs (`Dense search: top=... cosine=...`) and adjust if real-world scores differ from test data.
-
-### Current Limitations
-
-- **Step 7 (Ruuter service call) is not yet implemented.** The service workflow currently returns a debug response with service metadata (endpoint URL, HTTP method, extracted entities) instead of calling the actual Ruuter service endpoint. See the `TODO: STEP 7` comments in `src/tool_classifier/workflows/service_workflow.py`.