Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
743ed37
Lookups firstcut
gaya3-zipstack Jan 5, 2026
6881cb8
Lookups secondcut
gaya3-zipstack Jan 6, 2026
7558149
Working
gaya3-zipstack Jan 6, 2026
3db1cf0
Merge remote-tracking branch 'origin' into feature/look-ups
gaya3-zipstack Jan 8, 2026
a55f351
Save point
gaya3-zipstack Jan 8, 2026
bd4f07c
API returning lookedup data
gaya3-zipstack Jan 8, 2026
0400cb0
Merge remote-tracking branch 'origin' into feature/look-ups
gaya3-zipstack Jan 8, 2026
d502f76
Log integration
gaya3-zipstack Jan 12, 2026
d6ac066
Chunk size 0 fix
gaya3-zipstack Jan 13, 2026
0dfac91
Merge remote-tracking branch 'origin/main' into feature/look-ups
gaya3-zipstack Jan 13, 2026
65763a2
Fix RAG retrieval issues
gaya3-zipstack Jan 14, 2026
59f5c53
Re-indexing & disallow deletion of Lookup project on exisiting linked…
gaya3-zipstack Jan 19, 2026
204a821
Deleting vector DB nodes as appropriate
gaya3-zipstack Jan 20, 2026
d6491cf
Merge remote-tracking branch 'origin/main' into feature/look-ups
gaya3-zipstack Jan 21, 2026
dc866a1
Introducing prompt level lookups
gaya3-zipstack Jan 21, 2026
b22742e
Fix lookup for api deployments and combined output
gaya3-zipstack Jan 23, 2026
bcf0f67
conflict resolution
gaya3-zipstack Jan 27, 2026
8d23d96
Fix single pass mode
gaya3-zipstack Jan 28, 2026
37b9f76
Improve Debug
gaya3-zipstack Jan 28, 2026
9c0d68a
Improve helper prompts
gaya3-zipstack Jan 28, 2026
2033bcf
Lookup URL changes
gaya3-zipstack Feb 2, 2026
55ef35f
Merge with main
gaya3-zipstack Feb 4, 2026
1adbfc7
Update urls_v2.py
gaya3-zipstack Feb 4, 2026
c27a2a8
Add 'lookup' to SHARED_APPS in base.py
gaya3-zipstack Feb 4, 2026
f36b2a0
Update base.py
gaya3-zipstack Feb 4, 2026
29b9adb
Lookup architecture
gaya3-zipstack Feb 5, 2026
d8ea985
Lookup architecture
gaya3-zipstack Feb 5, 2026
7d33a25
Fix build errors
gaya3-zipstack Feb 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions backend/backend/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def filter(self, record):
"prompt_studio.prompt_studio_index_manager_v2",
"tags",
"configuration",
"lookup",
)
TENANT_APPS = []

Expand Down Expand Up @@ -599,3 +600,11 @@ def filter(self, record):
raise ValueError(ERROR_MESSAGE)

ENABLE_HIGHLIGHT_API_DEPLOYMENT = os.environ.get("ENABLE_HIGHLIGHT_API_DEPLOYMENT", False)

# Lookup Integration Settings
# Enable/disable automatic Lookup enrichment after Prompt Studio extraction
LOOKUP_AUTO_ENRICH_ENABLED = CommonUtils.str_to_bool(
os.environ.get("LOOKUP_AUTO_ENRICH_ENABLED", "True")
)
# Maximum time (in seconds) to wait for Lookup enrichment before returning
LOOKUP_ENRICHMENT_TIMEOUT = int(os.environ.get("LOOKUP_ENRICHMENT_TIMEOUT", "30"))
1 change: 1 addition & 0 deletions backend/backend/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
path("platform/", include("platform_settings.urls")),
path("api/", include("api.urls")),
path("usage/", include("usage.urls")),
path("lookup/", include("lookup.urls")),
path(
UrlPathConstants.PROMPT_STUDIO,
include("prompt_studio.prompt_profile_manager.urls"),
Expand Down
1 change: 1 addition & 0 deletions backend/backend/urls_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
path("usage/", include("usage_v2.urls")),
path("notifications/", include("notification_v2.urls")),
path("logs/", include("logs_helper.urls")),
path("lookup/", include("lookup.urls")),
path(
UrlPathConstants.PROMPT_STUDIO,
include("prompt_studio.prompt_profile_manager_v2.urls"),
Expand Down
Empty file added backend/lookup/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions backend/lookup/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""Lookup app configuration."""

from django.apps import AppConfig


class LookupConfig(AppConfig):
"""Configuration for the Lookup application."""

default_auto_field = "django.db.models.BigAutoField"
name = "lookup"
verbose_name = "Look-Up System"
28 changes: 28 additions & 0 deletions backend/lookup/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Constants for the Look-Up system."""


class LookupProfileManagerKeys:
"""Keys used in LookupProfileManager serialization."""

CREATED_BY = "created_by"
MODIFIED_BY = "modified_by"
LOOKUP_PROJECT = "lookup_project"
PROFILE_NAME = "profile_name"
LLM = "llm"
VECTOR_STORE = "vector_store"
EMBEDDING_MODEL = "embedding_model"
X2TEXT = "x2text"
CHUNK_SIZE = "chunk_size"
CHUNK_OVERLAP = "chunk_overlap"
SIMILARITY_TOP_K = "similarity_top_k"
IS_DEFAULT = "is_default"
REINDEX = "reindex"


class LookupProfileManagerErrors:
"""Error messages for LookupProfileManager operations."""

SERIALIZATION_FAILED = "Data serialization failed."
PROFILE_NAME_EXISTS = "A profile with this name already exists for this project."
DUPLICATE_API = "It appears that a duplicate call may have been made."
NO_DEFAULT_PROFILE = "No default profile found for this project."
97 changes: 97 additions & 0 deletions backend/lookup/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Custom exceptions for the Look-Up system.

This module defines custom exceptions specific to the Look-Up functionality.
"""


class LookupError(Exception):
"""Base exception for Look-Up system errors."""

pass


class ExtractionNotCompleteError(LookupError):
"""Raised when attempting to use reference data before extraction is complete.

This exception is raised when trying to load reference data for a project
where one or more data sources have not completed extraction processing.
"""

def __init__(self, failed_files=None):
"""Initialize the exception.

Args:
failed_files: List of file names that failed or are pending extraction
"""
self.failed_files = failed_files or []
message = "Reference data extraction not complete"
if failed_files:
message += f" for files: {', '.join(failed_files)}"
super().__init__(message)


class TemplateNotFoundError(LookupError):
"""Raised when a Look-Up project has no associated template.

This exception is raised when attempting to execute a Look-Up
that doesn't have a prompt template configured.
"""

pass


class ParseError(LookupError):
"""Raised when LLM response cannot be parsed.

This exception is raised when the LLM returns a response that
cannot be parsed as valid JSON or doesn't match expected format.
"""

pass


class DefaultProfileError(LookupError):
"""Raised when default profile is not found for a Look-Up project.

This exception is raised when attempting to get the default profile
for a Look-Up project that doesn't have one configured.
"""

pass


class ContextWindowExceededError(LookupError):
"""Raised when prompt + reference data exceeds LLM context window.

This exception is raised when the combined size of the prompt template,
reference data, and extracted data exceeds the configured LLM's context
window limit.
"""

def __init__(self, token_count: int, context_limit: int, model: str):
"""Initialize the exception.

Args:
token_count: Number of tokens in the prompt
context_limit: Maximum tokens allowed by the model
model: Name of the LLM model
"""
self.token_count = token_count
self.context_limit = context_limit
self.model = model
message = (
f"Context window exceeded: prompt requires {token_count:,} tokens "
f"but {model} has a limit of {context_limit:,} tokens. "
f"Reduce reference data size or use a model with larger context window."
)
super().__init__(message)


class RetrievalError(LookupError):
"""Raised when RAG retrieval fails.

This exception is raised when the vector similarity search fails
to retrieve context from indexed reference data.
"""

pass
8 changes: 8 additions & 0 deletions backend/lookup/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Integration modules for Look-Up functionality.

This package contains integrations with external services:
- Object Storage (S3-compatible)
- LLM Providers (OpenAI, Anthropic, etc.)
- LLMWhisperer (Document extraction)
- Redis Cache
"""
86 changes: 86 additions & 0 deletions backend/lookup/integrations/file_storage_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""File Storage Client for Look-Up reference data.

This module provides integration with Unstract's file storage
for loading extracted reference data content.
"""

import logging

from utils.file_storage.constants import FileStorageKeys

from unstract.sdk1.file_storage.constants import StorageType
from unstract.sdk1.file_storage.env_helper import EnvHelper

logger = logging.getLogger(__name__)


class FileStorageClient:
"""Storage client implementation using Unstract's file storage.

This client uses the actual platform file storage to read
extracted reference data content.
"""

def __init__(self):
"""Initialize the file storage client."""
self.fs_instance = EnvHelper.get_storage(
storage_type=StorageType.PERMANENT,
env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
)

def get(self, path: str) -> str:
"""Retrieve file content from storage.

Args:
path: Storage path to the file

Returns:
File content as string

Raises:
FileNotFoundError: If file doesn't exist
Exception: If reading fails
"""
try:
if not self.fs_instance.exists(path):
logger.error(f"File not found: {path}")
raise FileNotFoundError(f"File not found: {path}")

# Use read() method with text mode
content = self.fs_instance.read(path, mode="r", encoding="utf-8")
logger.debug(f"Read {len(content)} chars from {path}")
return content

except FileNotFoundError:
raise
except Exception as e:
logger.error(f"Failed to read file {path}: {e}")
raise Exception(f"Failed to read file: {str(e)}")

def exists(self, path: str) -> bool:
"""Check if path exists in storage.

Args:
path: Storage path

Returns:
True if exists
"""
return self.fs_instance.exists(path)

def get_text_content(self, path: str) -> str | None:
"""Get text content from storage (alias for get).

Args:
path: Storage path

Returns:
Text content or None if not found
"""
try:
return self.get(path)
except FileNotFoundError:
return None
except Exception as e:
logger.warning(f"Error reading {path}: {e}")
return None
Loading