Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions nemo_retriever/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,44 @@ ingestor = ingestor.files(documents).extract(method="nemotron_parse")

## Run with remote inference, no local GPU required:

For build.nvidia.com hosted inference, make sure you have NVIDIA_API_KEY set as an environment variable.
For build.nvidia.com hosted inference, make sure you have `NVIDIA_API_KEY` set as an environment variable:

```bash
export NVIDIA_API_KEY=nvapi-...
```

Pass `inference="build.nvidia.com"` to `create_ingestor()` and the correct NIM endpoint URLs are applied automatically:

```python
ingestor = (
ingestor.files(documents)
create_ingestor(run_mode="inprocess", inference="build.nvidia.com")
.files(documents)
.extract()
.embed()
.vdb_upload()
)
```

You can still override individual fields — any keyword argument you pass to `.extract()` or `.embed()` takes precedence over the preset:

```python
# Use the preset but swap in your own embedder endpoint
ingestor = (
create_ingestor(run_mode="inprocess", inference="build.nvidia.com")
.files(documents)
.extract()
.embed(embed_invoke_url="http://my-embedder:8000/v1")
.vdb_upload()
)
```

For self-hosted NIMs, pass the endpoint URLs directly — your URLs will depend on your NIM container DNS settings:

```python
ingestor = (
create_ingestor(run_mode="inprocess")
.files(documents)
.extract(
# for self hosted NIMs, your URLs will depend on your NIM container DNS settings
page_elements_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3",
graphic_elements_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1",
ocr_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1",
Expand Down
11 changes: 9 additions & 2 deletions nemo_retriever/src/nemo_retriever/application/modes/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ def create_runmode_ingestor(*, run_mode: RunMode = "inprocess", params: Ingestor
p = params or IngestorCreateParams()
if run_mode == "inprocess":
from nemo_retriever.ingest_modes.inprocess import InProcessIngestor

return InProcessIngestor(documents=p.documents)
from nemo_retriever.inference_presets import resolve_inference_preset

extract_defaults, embed_defaults = resolve_inference_preset(p.inference)
init_kwargs: dict = {"documents": p.documents}
if extract_defaults:
init_kwargs["default_extract_kwargs"] = extract_defaults
if embed_defaults:
init_kwargs["default_embed_kwargs"] = embed_defaults
return InProcessIngestor(**init_kwargs)
if run_mode == "batch":
from nemo_retriever.ingest_modes.batch import BatchIngestor

Expand Down
79 changes: 79 additions & 0 deletions nemo_retriever/src/nemo_retriever/inference_presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Named inference presets for hosted NIM endpoints.

Each preset maps a short name to default ``extract`` and ``embed`` kwargs that
are injected into the ingestor pipeline. Users can still override individual
fields by passing their own keyword arguments to ``.extract()`` or ``.embed()``.

Currently supported presets
----------------------------
``"build.nvidia.com"``
Uses the publicly hosted NIMs on `build.nvidia.com
<https://build.nvidia.com>`_ / ``ai.api.nvidia.com``. Requires
``NVIDIA_API_KEY`` to be set in the environment (or passed explicitly as
``api_key``).
"""

from __future__ import annotations

from typing import Any, Dict, Optional, Tuple

# ---------------------------------------------------------------------------
# build.nvidia.com preset
# ---------------------------------------------------------------------------

_BUILD_NVIDIA_EXTRACT_DEFAULTS: Dict[str, Any] = {
"page_elements_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3",
"graphic_elements_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1",
"ocr_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1",
"table_structure_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1",
}

_BUILD_NVIDIA_EMBED_DEFAULTS: Dict[str, Any] = {
"embed_invoke_url": "https://integrate.api.nvidia.com/v1/embeddings",
"model_name": "nvidia/llama-nemotron-embed-1b-v2",
"embed_modality": "text",
}

# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

_PRESETS: Dict[str, Tuple[Dict[str, Any], Dict[str, Any]]] = {
"build.nvidia.com": (
_BUILD_NVIDIA_EXTRACT_DEFAULTS,
_BUILD_NVIDIA_EMBED_DEFAULTS,
),
}


def resolve_inference_preset(
inference: Optional[str],
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""Return ``(extract_defaults, embed_defaults)`` for the given preset name.

Parameters
----------
inference:
Preset name (e.g. ``"build.nvidia.com"``), or ``None`` / empty string
to opt out of any preset (returns empty dicts).

Raises
------
ValueError
If *inference* is not ``None`` and is not a recognised preset name.
"""
if not inference:
return {}, {}
key = inference.strip().lower()
if key not in _PRESETS:
known = ", ".join(sorted(_PRESETS))
raise ValueError(
f"Unknown inference preset {inference!r}. "
f"Supported values: {known}"
)
extract_defaults, embed_defaults = _PRESETS[key]
return dict(extract_defaults), dict(embed_defaults)
33 changes: 26 additions & 7 deletions nemo_retriever/src/nemo_retriever/ingest_modes/inprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,13 @@ def _print_ingest_summary(results: list, elapsed_s: float) -> None:
class InProcessIngestor(Ingestor):
RUN_MODE = "inprocess"

def __init__(self, documents: Optional[List[str]] = None) -> None:
def __init__(
self,
documents: Optional[List[str]] = None,
*,
default_extract_kwargs: Optional[Dict[str, Any]] = None,
default_embed_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(documents=documents)

# Keep backwards-compatibility with code that inspects `Ingestor._documents`
Expand All @@ -959,6 +965,11 @@ def __init__(self, documents: Optional[List[str]] = None) -> None:
self._extract_txt_kwargs: Dict[str, Any] = {}
self._extract_html_kwargs: Dict[str, Any] = {}

# Inference preset defaults (e.g. from ``inference="build.nvidia.com"``).
# User-supplied kwargs in .extract()/.embed() always override these.
self._default_extract_kwargs: Dict[str, Any] = dict(default_extract_kwargs or {})
self._default_embed_kwargs: Dict[str, Any] = dict(default_embed_kwargs or {})

def files(self, documents: Union[str, List[str]]) -> "InProcessIngestor":
"""
Add local files for in-process execution.
Expand Down Expand Up @@ -1012,7 +1023,7 @@ def extract(self, params: ExtractParams | None = None, **kwargs: Any) -> "InProc
os.path.splitext(f)[1].lower() in SUPPORTED_IMAGE_EXTENSIONS for f in self._input_documents
):
return self.extract_image_files(params=params, **kwargs)
resolved = _coerce_params(params, ExtractParams, kwargs)
resolved = _coerce_params(params, ExtractParams, {**self._default_extract_kwargs, **kwargs})
if (
any(
(
Expand Down Expand Up @@ -1066,13 +1077,21 @@ def _append_detection_tasks(
def _stage_remote_kwargs(stage_name: str) -> dict[str, Any]:
stage_prefix = f"{stage_name}_"
out: dict[str, Any] = {}
invoke_url = kwargs.get(f"{stage_prefix}invoke_url", kwargs.get("invoke_url"))

def _stage_value(name: str) -> Any:
stage_key = f"{stage_prefix}{name}"
stage_value = kwargs.get(stage_key)
if stage_value is not None:
return stage_value
return kwargs.get(name)

invoke_url = _stage_value("invoke_url")
if invoke_url:
out["invoke_url"] = invoke_url
api_key = kwargs.get(f"{stage_prefix}api_key", kwargs.get("api_key"))
if api_key:
api_key = _stage_value("api_key")
if api_key is not None:
out["api_key"] = api_key
timeout = kwargs.get(f"{stage_prefix}request_timeout_s", kwargs.get("request_timeout_s"))
timeout = _stage_value("request_timeout_s")
if timeout is not None:
out["request_timeout_s"] = timeout
for k in ("remote_max_pool_workers", "remote_max_retries", "remote_max_429_retries"):
Expand Down Expand Up @@ -1343,7 +1362,7 @@ def embed(self, params: EmbedParams | None = None, **kwargs: Any) -> "InProcessI
``"http://embedding:8000/v1"``), a remote NIM endpoint is used for
embedding instead of the local HF model.
"""
resolved = _coerce_params(params, EmbedParams, kwargs)
resolved = _coerce_params(params, EmbedParams, {**self._default_embed_kwargs, **kwargs})
if any((resolved.embedding_endpoint, resolved.embed_invoke_url)) and not resolved.api_key:
resolved = resolved.model_copy(update={"api_key": resolve_remote_api_key()})
embed_modality = resolved.embed_modality
Expand Down
1 change: 1 addition & 0 deletions nemo_retriever/src/nemo_retriever/params/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class IngestorCreateParams(_ParamsModel):
ray_log_to_driver: bool = True
debug: bool = False
base_url: str = "http://localhost:7670"
inference: Optional[str] = None


class IngestExecuteParams(_ParamsModel):
Expand Down
Loading
Loading