Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
3020e31
Merge pull request #122 from rootcodelabs/wip
nuwangeek Feb 20, 2026
6e5c22c
remove unwanted file
nuwangeek Feb 20, 2026
38d0533
updated changes
nuwangeek Feb 20, 2026
72b8ae1
fixed requested changes
nuwangeek Feb 20, 2026
9b7bc7b
fixed issue
nuwangeek Feb 20, 2026
46dd6c4
Merge pull request #123 from rootcodelabs/llm-316
nuwangeek Feb 21, 2026
068f4e0
Merge pull request #124 from buerokratt/wip
Thirunayan22 Feb 21, 2026
a2084e5
service workflow implementation without calling service endpoints
nuwangeek Feb 24, 2026
5216c09
Merge pull request #126 from rootcodelabs/wip
nuwangeek Feb 24, 2026
864ad30
fixed requested changes
nuwangeek Feb 24, 2026
25f9614
fixed issues
nuwangeek Feb 24, 2026
69c1279
protocol related requested changes
nuwangeek Feb 24, 2026
07f2e0f
fixed requested changes
nuwangeek Feb 24, 2026
f63f777
update time tracking
nuwangeek Feb 25, 2026
5429bc0
added time tracking and reloacate input guardrail before toolclassifiier
nuwangeek Feb 25, 2026
721263a
fixed issue
nuwangeek Feb 25, 2026
6ed02d1
Merge pull request #127 from buerokratt/wip
nuwangeek Feb 25, 2026
7238baa
Merge branch 'optimization/llm-304' into wip
nuwangeek Feb 25, 2026
ae7cfa0
Merge pull request #128 from rootcodelabs/wip
nuwangeek Feb 25, 2026
f8a82b6
fixed issue
nuwangeek Feb 25, 2026
3b89fba
added hybrid search for the service detection
nuwangeek Feb 26, 2026
789f062
update tool classifier
nuwangeek Mar 1, 2026
609e6d5
fixing merge conflicts
nuwangeek Mar 1, 2026
a30c52d
Merge pull request #129 from buerokratt/wip
nuwangeek Mar 1, 2026
8dfc155
Merge pull request #130 from rootcodelabs/wip
nuwangeek Mar 1, 2026
3d7fb85
updated intent data enrichment and service classification flow perfor…
nuwangeek Mar 2, 2026
bee9fbf
fixed issue
nuwangeek Mar 2, 2026
4888045
Merge pull request #131 from rootcodelabs/optimization/data-enrichment
nuwangeek Mar 3, 2026
0a0806f
optimize first user query response generation time
nuwangeek Mar 3, 2026
1eb8b47
fixed pr reviewed issues
nuwangeek Mar 3, 2026
94b4f39
Merge pull request #132 from buerokratt/wip
nuwangeek Mar 3, 2026
82b3fe5
Merge branch 'optimization/vector-indexer' into wip
nuwangeek Mar 3, 2026
1b4ada9
Merge pull request #134 from buerokratt/wip
nuwangeek Mar 3, 2026
bb1601f
service integration
nuwangeek Mar 8, 2026
9ce1da2
context based response generation flow
nuwangeek Mar 9, 2026
d647f86
fixed pr review suggested issues
nuwangeek Mar 9, 2026
d67214e
Merge pull request #135 from rootcodelabs/llm-309
nuwangeek Mar 9, 2026
b90ab52
Merge pull request #136 from rootcodelabs/llm-310
nuwangeek Mar 9, 2026
6c46d3c
removed service project layer
nuwangeek Mar 10, 2026
d3e1494
fixed issues
nuwangeek Mar 12, 2026
4add446
Merge pull request #137 from rootcodelabs/llm-310
nuwangeek Mar 12, 2026
c2ef115
delete unnessary files
nuwangeek Mar 13, 2026
97f6f1a
added requested changes
nuwangeek Mar 13, 2026
0be284e
Merge pull request #138 from buerokratt/wip
nuwangeek Mar 17, 2026
a32ca6d
Merge branch 'llm/service-integration' into wip
nuwangeek Mar 17, 2026
4276e7d
Merge pull request #140 from buerokratt/wip
nuwangeek Mar 18, 2026
cb1bdc7
validate input sanitizer is compatible with mcq prefixes
nuwangeek Mar 18, 2026
72d2c1f
updated OrchestrationResponse to support buttons field
nuwangeek Mar 19, 2026
b2488ba
removed md file
nuwangeek Mar 19, 2026
6e49497
Enhance orchestration logging and update response models for choice b…
nuwangeek Mar 20, 2026
24259a9
Merge pull request #141 from buerokratt/wip
nuwangeek Mar 20, 2026
8ff7ecf
Merge pull request #142 from rootcodelabs/wip
nuwangeek Mar 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/llm_orchestration_service_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ async def orchestrate_llm_request(
# Process the request
response = await orchestration_service.process_orchestration_request(request)

buttons_present = bool(response.buttons)
buttons_count = len(response.buttons) if response.buttons else 0
logger.info(
f"[orchestrate] buttons in response for chatId {request.chatId}: "
f"present={buttons_present}, count={buttons_count}"
)
logger.info(f"Successfully processed request for chatId: {request.chatId}")
return response

Expand Down Expand Up @@ -364,6 +370,10 @@ async def test_orchestrate_llm_request(

# If response is already TestOrchestrationResponse (when environment is testing), return it directly
if isinstance(response, TestOrchestrationResponse):
buttons_count = len(response.buttons) if response.buttons else 0
logger.info(
f"[test_orchestrate] buttons present in response: {buttons_count}"
)
logger.info(
f"Successfully processed test request for environment: {request.environment}"
)
Expand All @@ -375,9 +385,9 @@ async def test_orchestrate_llm_request(
questionOutOfLLMScope=response.questionOutOfLLMScope,
inputGuardFailed=response.inputGuardFailed,
content=response.content,
buttons=response.buttons,
chunks=None, # OrchestrationResponse doesn't have chunks
)

logger.info(
f"Successfully processed test request for environment: {request.environment}"
)
Expand Down
18 changes: 18 additions & 0 deletions src/models/request_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ class DocumentReference(BaseModel):
relevance_score: float = Field(..., description="Relevance score (0-1)")


class ChoiceButton(BaseModel):
"""A single MCQ choice button returned in an orchestration response."""

title: str = Field(..., description="Button label shown to the user")
payload: str = Field(
...,
description="Routing string sent when the button is clicked (e.g. '#service, /POST/...')",
)


class OrchestrationResponse(BaseModel):
"""Model for LLM orchestration response."""

Expand All @@ -150,6 +160,10 @@ class OrchestrationResponse(BaseModel):
..., description="Whether input guard validation failed"
)
content: str = Field(..., description="Response content with citations")
buttons: Optional[List[ChoiceButton]] = Field(
default=None,
description="Optional list of choice buttons for MCQ step responses",
)


# New models for embedding and context generation
Expand Down Expand Up @@ -261,6 +275,10 @@ class TestOrchestrationResponse(BaseModel):
..., description="Whether input guard validation failed"
)
content: str = Field(..., description="Response content with citations")
buttons: Optional[List[ChoiceButton]] = Field(
default=None,
description="Optional list of choice buttons for MCQ step responses",
)
chunks: Optional[List[ChunkInfo]] = Field(
default=None, description="Retrieved chunks with rank and content"
)
5 changes: 2 additions & 3 deletions src/utils/input_sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def strip_html_tags(text: str) -> str:
if not text:
return text

text = html.unescape(text)

# First pass: Remove dangerous tags and their content
for tag in InputSanitizer.DANGEROUS_TAGS:
# Remove opening tag, content, and closing tag
Expand All @@ -74,9 +76,6 @@ def strip_html_tags(text: str) -> str:
# Third pass: Remove all remaining HTML tags
text = re.sub(r"<[^>]+>", "", text)

# Unescape HTML entities (e.g., &lt; -> <)
text = html.unescape(text)

return text

@staticmethod
Expand Down
125 changes: 125 additions & 0 deletions tests/test_input_sanitizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Unit tests for InputSanitizer — focused on #service prefix safety.

Validates that strip_html_tags() and sanitize_message() leave the
#service, /POST/... routing prefix characters (#, comma, /) untouched,
so that prefix detection logic in downstream handlers can always match.
"""

import pytest

from src.utils.input_sanitizer import InputSanitizer


class TestSanitizeMessageServicePrefix:
"""Primary passthrough: #service, /METHOD/... payloads must survive sanitization unchanged."""

def test_exact_service_prefix_passthrough(self) -> None:
"""The canonical #service prefix must survive sanitization bit-for-bit identical."""
msg = "#service, /POST/services/active/foo"
assert InputSanitizer.sanitize_message(msg) == msg

@pytest.mark.parametrize(
"msg",
[
"#service, /POST/services/active/foo",
"#service, /GET/services/list",
"#service, /DELETE/services/active/foo",
"#service, /PUT/services/active/foo",
"#service, /PATCH/services/active/foo",
"#service, /POST/services/active/foo?status=true",
"#service, /POST/services/active/foo?a=1&b=2",
"#service, /POST/services/active/foo#anchor",
],
)
def test_service_prefix_variants_passthrough(self, msg: str) -> None:
"""All #service, /METHOD/... variants must pass through unmodified."""
assert InputSanitizer.sanitize_message(msg) == msg


class TestSanitizeMessageHtmlStripping:
"""Confirms HTML IS stripped while #service prefix characters survive.

These tests prove the sanitizer is active (not a no-op) and that it
surgically removes only HTML constructs, leaving #, comma, and / intact.
"""

def test_bold_tags_stripped_prefix_survives(self) -> None:
result = InputSanitizer.sanitize_message(
"#service, <b>/POST/</b>services/active/foo"
)
assert result == "#service, /POST/services/active/foo"

def test_script_tag_content_stripped_path_survives(self) -> None:
"""Dangerous <script> tag and its content are removed; path remainder survives."""
result = InputSanitizer.sanitize_message(
"#service, /POST/<script>alert(1)</script>foo"
)
assert result == "#service, /POST/foo"

def test_entity_encoded_script_tag_stripped_path_survives(self) -> None:
"""Entity-encoded <script> tag is decoded then stripped; path remainder survives.

If html.unescape() ran *after* tag stripping, &lt;script&gt;...&lt;/script&gt;
would survive all three stripping passes and be decoded into a live <script>
tag in the output. The sanitizer must unescape *before* stripping to close
this bypass.
"""
result = InputSanitizer.sanitize_message(
"#service, /POST/&lt;script&gt;alert(1)&lt;/script&gt;foo"
)
assert result == "#service, /POST/foo"

def test_html_entities_unescaped_prefix_intact(self) -> None:
"""html.unescape() runs inside strip_html_tags(); confirm it does not alter #, comma, or /."""
result = InputSanitizer.sanitize_message("#service, /POST/foo&amp;bar")
assert result == "#service, /POST/foo&bar"

def test_hash_not_treated_as_html_tag(self) -> None:
"""# is never matched by <[^>]+>; verify it is never stripped."""
result = InputSanitizer.sanitize_message("#service, /GET/list")
assert result.startswith("#service")

def test_forward_slash_not_stripped(self) -> None:
"""/ characters must survive all three passes of strip_html_tags()."""
result = InputSanitizer.sanitize_message("#service, /POST/a/b/c")
assert "/POST/a/b/c" in result

def test_comma_not_stripped(self) -> None:
"""Comma separator between prefix and path must survive sanitization."""
result = InputSanitizer.sanitize_message("#service, /GET/list")
assert ", " in result


class TestSanitizeMessageWhitespace:
"""Documents whitespace normalisation rules that apply even to #service payloads.

Callers constructing #service payloads must use exactly one space after
the comma; this class documents what happens if they don't.
"""

def test_single_space_after_comma_preserved(self) -> None:
"""A single space between the comma and the slash is NOT collapsed or removed."""
msg = "#service, /POST/services/active/foo"
assert InputSanitizer.sanitize_message(msg) == msg

def test_double_space_after_comma_collapsed_to_single(self) -> None:
"""Two consecutive spaces are collapsed to one; callers must send exactly one space."""
result = InputSanitizer.sanitize_message("#service, /POST/services/active/foo")
assert result == "#service, /POST/services/active/foo"

def test_leading_whitespace_stripped(self) -> None:
result = InputSanitizer.sanitize_message(
" #service, /POST/services/active/foo"
)
assert result == "#service, /POST/services/active/foo"

def test_trailing_whitespace_stripped(self) -> None:
result = InputSanitizer.sanitize_message(
"#service, /POST/services/active/foo "
)
assert result == "#service, /POST/services/active/foo"

def test_tab_in_prefix_converted_to_space_then_collapsed(self) -> None:
"""Tabs within the prefix are normalised along with spaces; a tab after the comma becomes a single space."""
result = InputSanitizer.sanitize_message("#service,\t/POST/services/active/foo")
assert result == "#service, /POST/services/active/foo"
Loading