diff --git a/nemo_retriever/README.md b/nemo_retriever/README.md index 6a0ac50db..9424b3c4f 100644 --- a/nemo_retriever/README.md +++ b/nemo_retriever/README.md @@ -265,13 +265,44 @@ ingestor = ingestor.files(documents).extract(method="nemotron_parse") ## Run with remote inference, no local GPU required: -For build.nvidia.com hosted inference, make sure you have NVIDIA_API_KEY set as an environment variable. +For build.nvidia.com hosted inference, make sure you have `NVIDIA_API_KEY` set as an environment variable: + +```bash +export NVIDIA_API_KEY=nvapi-... +``` + +Pass `inference="build.nvidia.com"` to `create_ingestor()` and the correct NIM endpoint URLs are applied automatically: ```python ingestor = ( - ingestor.files(documents) + create_ingestor(run_mode="inprocess", inference="build.nvidia.com") + .files(documents) + .extract() + .embed() + .vdb_upload() +) +``` + +You can still override individual fields — any keyword argument you pass to `.extract()` or `.embed()` takes precedence over the preset: + +```python +# Use the preset but swap in your own embedder endpoint +ingestor = ( + create_ingestor(run_mode="inprocess", inference="build.nvidia.com") + .files(documents) + .extract() + .embed(embed_invoke_url="http://my-embedder:8000/v1") + .vdb_upload() +) +``` + +For self-hosted NIMs, pass the endpoint URLs directly — your URLs will depend on your NIM container DNS settings: + +```python +ingestor = ( + create_ingestor(run_mode="inprocess") + .files(documents) .extract( - # for self hosted NIMs, your URLs will depend on your NIM container DNS settings page_elements_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3", graphic_elements_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1", ocr_invoke_url="https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1", diff --git a/nemo_retriever/src/nemo_retriever/application/modes/factory.py b/nemo_retriever/src/nemo_retriever/application/modes/factory.py index e87c8fd39..0945b265c 100644 --- a/nemo_retriever/src/nemo_retriever/application/modes/factory.py +++ b/nemo_retriever/src/nemo_retriever/application/modes/factory.py @@ -12,8 +12,15 @@ def create_runmode_ingestor(*, run_mode: RunMode = "inprocess", params: Ingestor p = params or IngestorCreateParams() if run_mode == "inprocess": from nemo_retriever.ingest_modes.inprocess import InProcessIngestor - - return InProcessIngestor(documents=p.documents) + from nemo_retriever.inference_presets import resolve_inference_preset + + extract_defaults, embed_defaults = resolve_inference_preset(p.inference) + init_kwargs: dict = {"documents": p.documents} + if extract_defaults: + init_kwargs["default_extract_kwargs"] = extract_defaults + if embed_defaults: + init_kwargs["default_embed_kwargs"] = embed_defaults + return InProcessIngestor(**init_kwargs) if run_mode == "batch": from nemo_retriever.ingest_modes.batch import BatchIngestor diff --git a/nemo_retriever/src/nemo_retriever/inference_presets.py b/nemo_retriever/src/nemo_retriever/inference_presets.py new file mode 100644 index 000000000..2e8c487fb --- /dev/null +++ b/nemo_retriever/src/nemo_retriever/inference_presets.py @@ -0,0 +1,79 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Named inference presets for hosted NIM endpoints. + +Each preset maps a short name to default ``extract`` and ``embed`` kwargs that +are injected into the ingestor pipeline. Users can still override individual +fields by passing their own keyword arguments to ``.extract()`` or ``.embed()``. + +Currently supported presets +---------------------------- +``"build.nvidia.com"`` + Uses the publicly hosted NIMs on `build.nvidia.com + `_ / ``ai.api.nvidia.com``. Requires + ``NVIDIA_API_KEY`` to be set in the environment (or passed explicitly as + ``api_key``). +""" + +from __future__ import annotations + +from typing import Any, Dict, Optional, Tuple + +# --------------------------------------------------------------------------- +# build.nvidia.com preset +# --------------------------------------------------------------------------- + +_BUILD_NVIDIA_EXTRACT_DEFAULTS: Dict[str, Any] = { + "page_elements_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3", + "graphic_elements_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1", + "ocr_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1", + "table_structure_invoke_url": "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1", +} + +_BUILD_NVIDIA_EMBED_DEFAULTS: Dict[str, Any] = { + "embed_invoke_url": "https://integrate.api.nvidia.com/v1/embeddings", + "model_name": "nvidia/llama-nemotron-embed-1b-v2", + "embed_modality": "text", +} + +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + +_PRESETS: Dict[str, Tuple[Dict[str, Any], Dict[str, Any]]] = { + "build.nvidia.com": ( + _BUILD_NVIDIA_EXTRACT_DEFAULTS, + _BUILD_NVIDIA_EMBED_DEFAULTS, + ), +} + + +def resolve_inference_preset( + inference: Optional[str], +) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Return ``(extract_defaults, embed_defaults)`` for the given preset name. + + Parameters + ---------- + inference: + Preset name (e.g. ``"build.nvidia.com"``), or ``None`` / empty string + to opt out of any preset (returns empty dicts). + + Raises + ------ + ValueError + If *inference* is not ``None`` and is not a recognised preset name. + """ + if not inference: + return {}, {} + key = inference.strip().lower() + if key not in _PRESETS: + known = ", ".join(sorted(_PRESETS)) + raise ValueError( + f"Unknown inference preset {inference!r}. " + f"Supported values: {known}" + ) + extract_defaults, embed_defaults = _PRESETS[key] + return dict(extract_defaults), dict(embed_defaults) diff --git a/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess.py b/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess.py index 1f1d229a2..b6211bedb 100644 --- a/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess.py +++ b/nemo_retriever/src/nemo_retriever/ingest_modes/inprocess.py @@ -944,7 +944,13 @@ def _print_ingest_summary(results: list, elapsed_s: float) -> None: class InProcessIngestor(Ingestor): RUN_MODE = "inprocess" - def __init__(self, documents: Optional[List[str]] = None) -> None: + def __init__( + self, + documents: Optional[List[str]] = None, + *, + default_extract_kwargs: Optional[Dict[str, Any]] = None, + default_embed_kwargs: Optional[Dict[str, Any]] = None, + ) -> None: super().__init__(documents=documents) # Keep backwards-compatibility with code that inspects `Ingestor._documents` @@ -959,6 +965,11 @@ def __init__(self, documents: Optional[List[str]] = None) -> None: self._extract_txt_kwargs: Dict[str, Any] = {} self._extract_html_kwargs: Dict[str, Any] = {} + # Inference preset defaults (e.g. from ``inference="build.nvidia.com"``). + # User-supplied kwargs in .extract()/.embed() always override these. + self._default_extract_kwargs: Dict[str, Any] = dict(default_extract_kwargs or {}) + self._default_embed_kwargs: Dict[str, Any] = dict(default_embed_kwargs or {}) + def files(self, documents: Union[str, List[str]]) -> "InProcessIngestor": """ Add local files for in-process execution. @@ -1012,7 +1023,7 @@ def extract(self, params: ExtractParams | None = None, **kwargs: Any) -> "InProc os.path.splitext(f)[1].lower() in SUPPORTED_IMAGE_EXTENSIONS for f in self._input_documents ): return self.extract_image_files(params=params, **kwargs) - resolved = _coerce_params(params, ExtractParams, kwargs) + resolved = _coerce_params(params, ExtractParams, {**self._default_extract_kwargs, **kwargs}) if ( any( ( @@ -1066,13 +1077,21 @@ def _append_detection_tasks( def _stage_remote_kwargs(stage_name: str) -> dict[str, Any]: stage_prefix = f"{stage_name}_" out: dict[str, Any] = {} - invoke_url = kwargs.get(f"{stage_prefix}invoke_url", kwargs.get("invoke_url")) + + def _stage_value(name: str) -> Any: + stage_key = f"{stage_prefix}{name}" + stage_value = kwargs.get(stage_key) + if stage_value is not None: + return stage_value + return kwargs.get(name) + + invoke_url = _stage_value("invoke_url") if invoke_url: out["invoke_url"] = invoke_url - api_key = kwargs.get(f"{stage_prefix}api_key", kwargs.get("api_key")) - if api_key: + api_key = _stage_value("api_key") + if api_key is not None: out["api_key"] = api_key - timeout = kwargs.get(f"{stage_prefix}request_timeout_s", kwargs.get("request_timeout_s")) + timeout = _stage_value("request_timeout_s") if timeout is not None: out["request_timeout_s"] = timeout for k in ("remote_max_pool_workers", "remote_max_retries", "remote_max_429_retries"): @@ -1343,7 +1362,7 @@ def embed(self, params: EmbedParams | None = None, **kwargs: Any) -> "InProcessI ``"http://embedding:8000/v1"``), a remote NIM endpoint is used for embedding instead of the local HF model. """ - resolved = _coerce_params(params, EmbedParams, kwargs) + resolved = _coerce_params(params, EmbedParams, {**self._default_embed_kwargs, **kwargs}) if any((resolved.embedding_endpoint, resolved.embed_invoke_url)) and not resolved.api_key: resolved = resolved.model_copy(update={"api_key": resolve_remote_api_key()}) embed_modality = resolved.embed_modality diff --git a/nemo_retriever/src/nemo_retriever/params/models.py b/nemo_retriever/src/nemo_retriever/params/models.py index f08548cbe..53ad0752c 100644 --- a/nemo_retriever/src/nemo_retriever/params/models.py +++ b/nemo_retriever/src/nemo_retriever/params/models.py @@ -44,6 +44,7 @@ class IngestorCreateParams(_ParamsModel): ray_log_to_driver: bool = True debug: bool = False base_url: str = "http://localhost:7670" + inference: Optional[str] = None class IngestExecuteParams(_ParamsModel): diff --git a/nemo_retriever/tests/test_inference_presets.py b/nemo_retriever/tests/test_inference_presets.py new file mode 100644 index 000000000..c76880353 --- /dev/null +++ b/nemo_retriever/tests/test_inference_presets.py @@ -0,0 +1,179 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for the inference preset feature and related create_ingestor() behaviour.""" + +import sys +import types + +import pytest + +from nemo_retriever.inference_presets import ( + _BUILD_NVIDIA_EMBED_DEFAULTS, + _BUILD_NVIDIA_EXTRACT_DEFAULTS, + resolve_inference_preset, +) +from nemo_retriever.ingestor import create_ingestor +from nemo_retriever.params import IngestorCreateParams + + +# --------------------------------------------------------------------------- +# resolve_inference_preset() +# --------------------------------------------------------------------------- + + +class TestResolveInferencePreset: + def test_none_returns_empty_dicts(self): + extract, embed = resolve_inference_preset(None) + assert extract == {} + assert embed == {} + + def test_empty_string_returns_empty_dicts(self): + extract, embed = resolve_inference_preset("") + assert extract == {} + assert embed == {} + + def test_build_nvidia_returns_expected_extract_defaults(self): + extract, _ = resolve_inference_preset("build.nvidia.com") + assert extract["page_elements_invoke_url"] == ( + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3" + ) + assert extract["graphic_elements_invoke_url"] == ( + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1" + ) + assert extract["ocr_invoke_url"] == ( + "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1" + ) + assert extract["table_structure_invoke_url"] == ( + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1" + ) + + def test_build_nvidia_returns_expected_embed_defaults(self): + _, embed = resolve_inference_preset("build.nvidia.com") + assert embed["embed_invoke_url"] == "https://integrate.api.nvidia.com/v1/embeddings" + assert embed["model_name"] == "nvidia/llama-nemotron-embed-1b-v2" + assert embed["embed_modality"] == "text" + + def test_returns_copies_not_originals(self): + extract1, embed1 = resolve_inference_preset("build.nvidia.com") + extract2, embed2 = resolve_inference_preset("build.nvidia.com") + # Mutating one copy must not affect the other or the module-level constants. + extract1["new_key"] = "new_value" + assert "new_key" not in extract2 + assert "new_key" not in _BUILD_NVIDIA_EXTRACT_DEFAULTS + + def test_unknown_preset_raises_value_error(self): + with pytest.raises(ValueError, match="Unknown inference preset"): + resolve_inference_preset("unknown-preset") + + +# --------------------------------------------------------------------------- +# IngestorCreateParams accepts inference field +# --------------------------------------------------------------------------- + + +class TestIngestorCreateParamsInference: + def test_default_is_none(self): + p = IngestorCreateParams() + assert p.inference is None + + def test_accepts_build_nvidia(self): + p = IngestorCreateParams(inference="build.nvidia.com") + assert p.inference == "build.nvidia.com" + + def test_rejects_unknown_value_at_ingestor_level(self): + """Unknown preset names are caught when the ingestor is built, not at param construction.""" + p = IngestorCreateParams(inference="totally-unknown") + assert p.inference == "totally-unknown" + + +# --------------------------------------------------------------------------- +# Helpers shared by factory tests below +# --------------------------------------------------------------------------- + + +def _make_capturing_inprocess_module(monkeypatch: pytest.MonkeyPatch) -> type: + """Register a capturing DummyIngestor for nemo_retriever.ingest_modes.inprocess.""" + module = types.ModuleType("nemo_retriever.ingest_modes.inprocess") + + class CapturingIngestor: + def __init__(self, **kwargs): + self.kwargs = kwargs + self._default_extract_kwargs = kwargs.get("default_extract_kwargs", {}) + self._default_embed_kwargs = kwargs.get("default_embed_kwargs", {}) + + CapturingIngestor.__name__ = "InProcessIngestor" + setattr(module, "InProcessIngestor", CapturingIngestor) + monkeypatch.setitem(sys.modules, "nemo_retriever.ingest_modes.inprocess", module) + return CapturingIngestor + + +# --------------------------------------------------------------------------- +# create_ingestor() with inference preset +# --------------------------------------------------------------------------- + + +class TestCreateIngestorWithPreset: + def test_build_nvidia_preset_passes_extract_defaults(self, monkeypatch): + """Factory passes preset extract defaults to InProcessIngestor.""" + dummy_cls = _make_capturing_inprocess_module(monkeypatch) + + ingestor = create_ingestor(run_mode="inprocess", inference="build.nvidia.com") + + assert isinstance(ingestor, dummy_cls) + assert ingestor._default_extract_kwargs == _BUILD_NVIDIA_EXTRACT_DEFAULTS + + def test_build_nvidia_preset_passes_embed_defaults(self, monkeypatch): + """Factory passes preset embed defaults to InProcessIngestor.""" + dummy_cls = _make_capturing_inprocess_module(monkeypatch) + + ingestor = create_ingestor(run_mode="inprocess", inference="build.nvidia.com") + + assert isinstance(ingestor, dummy_cls) + assert ingestor._default_embed_kwargs == _BUILD_NVIDIA_EMBED_DEFAULTS + + def test_no_preset_leaves_defaults_empty(self, monkeypatch): + """When inference is not provided, no defaults are passed to InProcessIngestor.""" + dummy_cls = _make_capturing_inprocess_module(monkeypatch) + + ingestor = create_ingestor(run_mode="inprocess") + + assert isinstance(ingestor, dummy_cls) + assert ingestor._default_extract_kwargs == {} + assert ingestor._default_embed_kwargs == {} + + def test_unknown_preset_raises(self, monkeypatch): + """Unknown preset name raises ValueError before the ingestor is constructed.""" + _make_capturing_inprocess_module(monkeypatch) + with pytest.raises(ValueError, match="Unknown inference preset"): + create_ingestor(run_mode="inprocess", inference="bad-preset") + + +# --------------------------------------------------------------------------- +# InProcessIngestor default kwargs merging (tested via factory layer) +# --------------------------------------------------------------------------- + + +class TestInProcessIngestorPresetMerging: + """Verify that preset defaults are wired through correctly. + + These tests use a capturing DummyIngestor (same pattern as test_factory.py) + to avoid pulling in the full batch/ray dependency stack. + """ + + def test_preset_defaults_stored_on_construction(self, monkeypatch): + """_default_extract_kwargs and _default_embed_kwargs are stored via factory.""" + dummy_cls = _make_capturing_inprocess_module(monkeypatch) + ingestor = create_ingestor(run_mode="inprocess", inference="build.nvidia.com") + assert isinstance(ingestor, dummy_cls) + assert ingestor._default_extract_kwargs == _BUILD_NVIDIA_EXTRACT_DEFAULTS + + def test_empty_defaults_on_construction_without_preset(self, monkeypatch): + """No defaults set when inference preset is absent.""" + dummy_cls = _make_capturing_inprocess_module(monkeypatch) + ingestor = create_ingestor(run_mode="inprocess") + assert isinstance(ingestor, dummy_cls) + assert ingestor._default_extract_kwargs == {} + assert ingestor._default_embed_kwargs == {} +