Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.9.17
3.11
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/astral-sh/uv:python3.9-bookworm AS base
FROM ghcr.io/astral-sh/uv:python3.11-bookworm AS base


# shared between builder and runtime image
Expand All @@ -22,7 +22,7 @@ RUN apt-get update \

# set and check UNO_PATH, UNO_PYTHON_PATH and UNO_OFFICE_BINARY_PATH
ENV UNO_PATH=/usr/lib/python3/dist-packages
ENV UNO_PYTHON_PATH=/usr/local/bin/python3.9
ENV UNO_PYTHON_PATH=/usr/local/bin/python3.11
ENV UNO_OFFICE_BINARY_PATH=/usr/lib/libreoffice/program/soffice.bin
RUN \
echo "UNO_PATH: ${UNO_PATH}" \
Expand Down Expand Up @@ -105,8 +105,8 @@ ENV LC_ALL=C
FROM dev AS python-dist-builder

ARG python_package_version
RUN echo "Setting version to: $version" && \
uv version "$python_package_version"
RUN echo "Setting version to: $python_package_version" && \
if [ -n "$python_package_version" ]; then uv version "$python_package_version"; fi
RUN python scripts/dev/update_readme.py \
--source=./doc/python_library.md \
--target=./doc/generated_python_library.md \
Expand Down
10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "sciencebeam-parser"
version = "0.0.0"
description = "ScienceBeam Parser, parse scientific documents."
readme = "doc/generated_python_library.md"
requires-python = ">=3.9,<3.10"
requires-python = ">=3.10"
dependencies = [
"fastapi[standard]>=0.124.0",
"fsspec>=2022.1.0",
Expand All @@ -15,11 +15,11 @@ dependencies = [

[project.optional-dependencies]
cpu = [
"torch==1.10.1+cpu",
"torchvision==0.11.2+cpu"
"torch>=2.5.1",
"torchvision>=0.20.1"
]
delft = [
"sciencebeam-trainer-delft[delft]>=0.0.34",
"sciencebeam-trainer-delft[delft]>=0.0.36",
]
cv = [
"layoutparser==0.3.2",
Expand All @@ -39,7 +39,7 @@ ocr = [
"tesserocr==2.5.2",
]
tf = [
"tensorflow==2.9.3",
"tensorflow>=2.17.1",
]

[tool.uv.sources]
Expand Down
3 changes: 1 addition & 2 deletions sciencebeam_parser/document/semantic_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from typing_extensions import Protocol

from sciencebeam_parser.document.layout_document import (
EMPTY_BLOCK,
LayoutBlock,
LayoutGraphic,
LayoutToken
Expand Down Expand Up @@ -59,7 +58,7 @@ def merged_block(self) -> LayoutBlock:

@dataclass
class SemanticSimpleContentWrapper(SemanticContentWrapper):
content: LayoutBlock = EMPTY_BLOCK
content: LayoutBlock = field(default_factory=lambda: LayoutBlock(lines=[]))

layout_block: dataclasses.InitVar[LayoutBlock] = None

Expand Down
11 changes: 5 additions & 6 deletions sciencebeam_parser/models/delft_model_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
from typing import Optional, List, Tuple

from sciencebeam_trainer_delft.embedding.manager import EmbeddingManager
from sciencebeam_trainer_delft.sequence_labelling.wrapper import (
DEFAULT_EMBEDDINGS_PATH,
Sequence
)
from sciencebeam_trainer_delft.sequence_labelling.wrapper import Sequence

from sciencebeam_parser.app.context import AppContext
from sciencebeam_parser.models.model_impl import ModelImpl
Expand All @@ -15,6 +12,9 @@
LOGGER = logging.getLogger(__name__)


REGISTRY_REGISTRY_PATH = 'delft/resources-registry.json'


class DelftModelImpl(ModelImpl):
def __init__(self, model_url: str, app_context: AppContext):
self.model_url = model_url
Expand All @@ -27,9 +27,8 @@ def __repr__(self) -> str:
)

def _load_model(self) -> Sequence:
embedding_registry_path = DEFAULT_EMBEDDINGS_PATH
embedding_manager = EmbeddingManager(
path=embedding_registry_path,
path=REGISTRY_REGISTRY_PATH,
download_manager=self.app_context.download_manager
)
model = Sequence(
Expand Down
Loading
Loading