Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 6 additions & 13 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from serialize_value import serialize_value

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -54,17 +55,8 @@ def get_lance_connection():

def serialize_arrow_value(value):
try:
if pa.types.is_null(value.type):
return None
elif pa.types.is_boolean(value.type):
return value.as_py()
elif pa.types.is_integer(value.type) or pa.types.is_floating(value.type):
return value.as_py()
elif pa.types.is_string(value.type) or pa.types.is_large_string(value.type):
return value.as_py()
elif pa.types.is_timestamp(value.type):
return value.as_py().isoformat() if value.as_py() else None
elif pa.types.is_list(value.type) and pa.types.is_floating(value.value_type):
# Handle vector columns with special processing
if pa.types.is_list(value.type) and pa.types.is_floating(value.value_type):
try:
vec = value.as_py()
if vec is None:
Expand Down Expand Up @@ -118,8 +110,9 @@ def serialize_arrow_value(value):
except Exception as vec_error:
logger.warning(f"Error processing vector data: {vec_error}")
return {"type": "vector", "error": f"Vector processing failed: {str(vec_error)}"}
else:
return str(value.as_py())

# Use the general serialize_value utility for all other types
return serialize_value(value)
except Exception as e:
logger.warning(f"Error serializing value: {e}")
return {"error": f"Serialization failed: {str(e)}"}
Expand Down
86 changes: 86 additions & 0 deletions backend/serialize_value.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import base64
from datetime import date, datetime, time, timedelta

import numpy as np
import pyarrow as pa


def _serialize_temporal(obj):
"""Convert temporal types to string representation."""
if isinstance(obj, (datetime, date, time)):
return obj.isoformat()
if isinstance(obj, timedelta):
return obj.total_seconds()
return str(obj)


def _serialize_pyarrow_scalar(obj):
"""Convert PyArrow scalar types to JSON-serializable format."""
if pa.types.is_binary(obj.type):
return base64.b64encode(obj.as_py()).decode("utf-8")

if pa.types.is_temporal(obj.type):
return _serialize_temporal(obj.as_py())

if pa.types.is_list(obj.type) or pa.types.is_map(obj.type):
return [serialize_value(item) for item in obj.as_py()]

if pa.types.is_struct(obj.type):
return {
field.name: serialize_value(obj.field(field.name).as_py())
for field in obj.type
}

if pa.types.is_floating(obj.type):
return float(obj.as_py())

return obj.as_py()


def _serialize_container(obj):
"""Convert container types (dict, list, tuple) recursively."""
if isinstance(obj, dict):
return {key: serialize_value(value) for key, value in obj.items()}
if isinstance(obj, (list, tuple)):
return [serialize_value(item) for item in obj]
return obj


def _serialize_basic_types(obj):
"""Convert basic Python types to JSON-serializable format."""
if isinstance(obj, (bytes, pa.BinaryScalar)):
return base64.b64encode(obj).decode("utf-8")
if isinstance(obj, (datetime, date, time)):
return obj.isoformat()
if isinstance(obj, timedelta):
return obj.total_seconds()
if isinstance(obj, np.number):
return obj.item()
return obj


def serialize_value(obj):
"""
Recursively convert objects to JSON-serializable format.

Handles:
- bytes/PyArrow binary: Base64-encoded string
- datetime types: ISO format string
- PyArrow types: Python native types
- nested types: recursive conversion
"""
# First try basic type conversions
result = _serialize_basic_types(obj)
if result is not obj:
return result

# Then try container types
result = _serialize_container(obj)
if result is not obj:
return result

# Finally try PyArrow scalar types
if isinstance(obj, pa.Scalar):
return _serialize_pyarrow_scalar(obj)

return obj
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ COPY --from=builder /root/.local /home/appuser/.local

WORKDIR /app

COPY backend/app.py .
COPY backend/*.py .
COPY web/vanilla/ /web/

RUN chown -R appuser:appuser /app /web
Expand All @@ -47,4 +47,4 @@ LABEL org.opencontainers.image.version="0.1.0"
LABEL org.opencontainers.image.licenses="MIT"
LABEL com.github.lancedb.version="${LANCEDB_VERSION}"

CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]