Skip to content

Commit 0b2ed27

Browse files
authored
Merge pull request #79 from rootcodelabs/optimization-Bimsara
Get update from optimization-Bimsara into integrate-streaming
2 parents 5d442b6 + 087be37 commit 0b2ed27

29 files changed

+2464
-386
lines changed

generate_presigned_url.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
# List of files to process
1616
files_to_process: List[Dict[str, str]] = [
17-
{"bucket": "ckb", "key": "sm_someuuid/sm_someuuid.zip"},
17+
{"bucket": "ckb", "key": "ID.ee/ID.ee.zip"},
1818
]
1919

2020
# Generate presigned URLs

src/contextual_retrieval/bm25_search.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,19 +141,19 @@ async def search_bm25(
141141

142142
logger.info(f"BM25 search found {len(results)} chunks")
143143

144-
# Debug logging for BM25 results
145-
logger.info("=== BM25 SEARCH RESULTS BREAKDOWN ===")
144+
# Detailed results at DEBUG level (loguru filters based on log level config)
145+
logger.debug("=== BM25 SEARCH RESULTS BREAKDOWN ===")
146146
for i, chunk in enumerate(results[:10]): # Show top 10 results
147147
content_preview = (
148148
(chunk.get("original_content", "")[:150] + "...")
149149
if len(chunk.get("original_content", "")) > 150
150150
else chunk.get("original_content", "")
151151
)
152-
logger.info(
152+
logger.debug(
153153
f" Rank {i + 1}: BM25_score={chunk['score']:.4f}, id={chunk.get('chunk_id', 'unknown')}"
154154
)
155-
logger.info(f" content: '{content_preview}'")
156-
logger.info("=== END BM25 SEARCH RESULTS ===")
155+
logger.debug(f" content: '{content_preview}'")
156+
logger.debug("=== END BM25 SEARCH RESULTS ===")
157157

158158
return results
159159

src/contextual_retrieval/qdrant_search.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,19 @@ async def search_contextual_embeddings_direct(
148148
f"Semantic search found {len(all_results)} chunks across {len(collections)} collections"
149149
)
150150

151-
# Debug logging for final sorted results
152-
logger.info("=== SEMANTIC SEARCH RESULTS BREAKDOWN ===")
151+
# Detailed results at DEBUG level (loguru filters based on log level config)
152+
logger.debug("=== SEMANTIC SEARCH RESULTS BREAKDOWN ===")
153153
for i, chunk in enumerate(all_results[:10]): # Show top 10 results
154154
content_preview = (
155155
(chunk.get("original_content", "")[:150] + "...")
156156
if len(chunk.get("original_content", "")) > 150
157157
else chunk.get("original_content", "")
158158
)
159-
logger.info(
159+
logger.debug(
160160
f" Rank {i + 1}: score={chunk['score']:.4f}, collection={chunk.get('source_collection', 'unknown')}, id={chunk['chunk_id']}"
161161
)
162-
logger.info(f" content: '{content_preview}'")
163-
logger.info("=== END SEMANTIC SEARCH RESULTS ===")
162+
logger.debug(f" content: '{content_preview}'")
163+
logger.debug("=== END SEMANTIC SEARCH RESULTS ===")
164164

165165
return all_results
166166

src/contextual_retrieval/rank_fusion.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ def fuse_results(
6565

6666
logger.info(f"Fusion completed: {len(final_results)} final results")
6767

68-
# Debug logging for final fused results
69-
logger.info("=== RANK FUSION FINAL RESULTS ===")
68+
# Detailed results at DEBUG level (loguru filters based on log level config)
69+
logger.debug("=== RANK FUSION FINAL RESULTS ===")
7070
for i, chunk in enumerate(final_results):
7171
content_preview_len = self._config.rank_fusion.content_preview_length
7272
content_preview = (
@@ -78,13 +78,13 @@ def fuse_results(
7878
bm25_score = chunk.get("bm25_score", 0)
7979
fused_score = chunk.get("fused_score", 0)
8080
search_type = chunk.get("search_type", QueryTypeConstants.UNKNOWN)
81-
logger.info(
81+
logger.debug(
8282
f" Final Rank {i + 1}: fused_score={fused_score:.4f}, semantic={sem_score:.4f}, bm25={bm25_score:.4f}, type={search_type}"
8383
)
84-
logger.info(
84+
logger.debug(
8585
f" id={chunk.get('chunk_id', QueryTypeConstants.UNKNOWN)}, content: '{content_preview}'"
8686
)
87-
logger.info("=== END RANK FUSION RESULTS ===")
87+
logger.debug("=== END RANK FUSION RESULTS ===")
8888

8989
return final_results
9090

0 commit comments

Comments
 (0)