From c05094518445513d7e3feb45313f68847aba31d1 Mon Sep 17 00:00:00 2001 From: abhay1999 Date: Wed, 18 Mar 2026 07:23:22 +0530 Subject: [PATCH 1/2] helm: add nodeSelector and tolerations to NIMCache templates NIMCache resources were missing nodeSelector and tolerations fields across all Operator-mode templates. Without these, NIMCache pods cannot be scheduled on GPU nodes that have taints or require specific node selection (e.g. cloud provider GPU node pools). The NIMService sections of the same templates already expose these fields correctly. This commit adds the equivalent fields to the NIMCache sections of all eight affected templates, and also adds the missing nodeSelector to the NIMService sections of nemotron-nano-12b-v2-vl and nemotron-parse which only had tolerations. Fixes #1636 Signed-off-by: abhay1999 --- helm/templates/llama-nemotron-embed-1b-v2.yaml | 4 ++++ helm/templates/llama-nemotron-rerank-1b-v2.yaml | 4 ++++ helm/templates/nemotron-graphic-elements-v1.yaml | 4 ++++ helm/templates/nemotron-nano-12b-v2-vl.yaml | 6 ++++++ helm/templates/nemotron-ocr-v1.yaml | 4 ++++ helm/templates/nemotron-page-elements-v3.yaml | 5 ++++- helm/templates/nemotron-parse.yaml | 6 ++++++ helm/templates/nemotron-table-structure-v1.yaml | 4 ++++ 8 files changed, 36 insertions(+), 1 deletion(-) diff --git a/helm/templates/llama-nemotron-embed-1b-v2.yaml b/helm/templates/llama-nemotron-embed-1b-v2.yaml index 199bcdc9c..de0b429b7 100644 --- a/helm/templates/llama-nemotron-embed-1b-v2.yaml +++ b/helm/templates/llama-nemotron-embed-1b-v2.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.embedqa.storage.pvc.storageClass }} size: {{ .Values.nimOperator.embedqa.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.embedqa.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.embedqa.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.embedqa.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService diff --git a/helm/templates/llama-nemotron-rerank-1b-v2.yaml b/helm/templates/llama-nemotron-rerank-1b-v2.yaml index 6cfc2fcfc..e85490a30 100644 --- a/helm/templates/llama-nemotron-rerank-1b-v2.yaml +++ b/helm/templates/llama-nemotron-rerank-1b-v2.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.rerankqa.storage.pvc.storageClass }} size: {{ .Values.nimOperator.rerankqa.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.rerankqa.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.rerankqa.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.rerankqa.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService diff --git a/helm/templates/nemotron-graphic-elements-v1.yaml b/helm/templates/nemotron-graphic-elements-v1.yaml index 1fbdcbaf7..1549991e5 100644 --- a/helm/templates/nemotron-graphic-elements-v1.yaml +++ b/helm/templates/nemotron-graphic-elements-v1.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.graphic_elements.storage.pvc.storageClass }} size: {{ .Values.nimOperator.graphic_elements.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.graphic_elements.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.graphic_elements.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.graphic_elements.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService diff --git a/helm/templates/nemotron-nano-12b-v2-vl.yaml b/helm/templates/nemotron-nano-12b-v2-vl.yaml index 1b5c1ca9d..a8ffaf8e3 100644 --- a/helm/templates/nemotron-nano-12b-v2-vl.yaml +++ b/helm/templates/nemotron-nano-12b-v2-vl.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.storageClass }} size: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService @@ -34,6 +38,8 @@ spec: nimCache: name: nemotron-nano-12b-v2-vl replicas: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.replicas }} + nodeSelector: +{{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.nodeSelector | nindent 4 }} resources: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.resources | nindent 4 }} tolerations: diff --git a/helm/templates/nemotron-ocr-v1.yaml b/helm/templates/nemotron-ocr-v1.yaml index 7ae0f2dea..60ab1a0ce 100644 --- a/helm/templates/nemotron-ocr-v1.yaml +++ b/helm/templates/nemotron-ocr-v1.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.ocr.storage.pvc.storageClass }} size: {{ .Values.nimOperator.ocr.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.ocr.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.ocr.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.ocr.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService diff --git a/helm/templates/nemotron-page-elements-v3.yaml b/helm/templates/nemotron-page-elements-v3.yaml index 223c7db24..2f8aa3ee4 100644 --- a/helm/templates/nemotron-page-elements-v3.yaml +++ b/helm/templates/nemotron-page-elements-v3.yaml @@ -17,7 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.page_elements.storage.pvc.storageClass }} size: {{ .Values.nimOperator.page_elements.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.page_elements.storage.pvc.volumeAccessMode }} - resources: {} + nodeSelector: +{{ toYaml .Values.nimOperator.page_elements.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.page_elements.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService diff --git a/helm/templates/nemotron-parse.yaml b/helm/templates/nemotron-parse.yaml index 25f3e07ac..114aa1c39 100644 --- a/helm/templates/nemotron-parse.yaml +++ b/helm/templates/nemotron-parse.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.nemotron_parse.storage.pvc.storageClass }} size: {{ .Values.nimOperator.nemotron_parse.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.nemotron_parse.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.nemotron_parse.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.nemotron_parse.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService @@ -34,6 +38,8 @@ spec: nimCache: name: nemotron-parse replicas: {{ .Values.nimOperator.nemotron_parse.replicas }} + nodeSelector: +{{ toYaml .Values.nimOperator.nemotron_parse.nodeSelector | nindent 4 }} resources: {{ toYaml .Values.nimOperator.nemotron_parse.resources | nindent 4 }} tolerations: diff --git a/helm/templates/nemotron-table-structure-v1.yaml b/helm/templates/nemotron-table-structure-v1.yaml index b305a1e09..0320e9246 100644 --- a/helm/templates/nemotron-table-structure-v1.yaml +++ b/helm/templates/nemotron-table-structure-v1.yaml @@ -17,6 +17,10 @@ spec: storageClass: {{ .Values.nimOperator.table_structure.storage.pvc.storageClass }} size: {{ .Values.nimOperator.table_structure.storage.pvc.size }} volumeAccessMode: {{ .Values.nimOperator.table_structure.storage.pvc.volumeAccessMode }} + nodeSelector: +{{ toYaml .Values.nimOperator.table_structure.nodeSelector | nindent 4 }} + tolerations: +{{ toYaml .Values.nimOperator.table_structure.tolerations | nindent 4 }} --- apiVersion: apps.nvidia.com/v1alpha1 kind: NIMService From d3d6c057d4fb3b8a5b0c98f7d7e835f08efc3c82 Mon Sep 17 00:00:00 2001 From: abhay1999 Date: Wed, 18 Mar 2026 07:27:33 +0530 Subject: [PATCH 2/2] docs: fix trailing blank line in quickstart-library-mode.md Remove extra trailing blank line to satisfy pre-commit end-of-file-fixer hook. Signed-off-by: abhay1999 --- docs/docs/extraction/quickstart-library-mode.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/docs/extraction/quickstart-library-mode.md b/docs/docs/extraction/quickstart-library-mode.md index e27b7e413..9122c4e79 100644 --- a/docs/docs/extraction/quickstart-library-mode.md +++ b/docs/docs/extraction/quickstart-library-mode.md @@ -5,4 +5,3 @@ NVIDIA Ingest (nv-ingest) has been renamed to the NeMo Retriever Library. Use the [Quick Start for NeMo Retriever Library](https://github.com/NVIDIA/NeMo-Retriever/blob/26.03/nemo_retriever/README.md) to set up and run the NeMo Retriever Library locally, so you can build a GPU‑accelerated, multimodal RAG ingestion pipeline that parses PDFs, HTML, text, audio, and video into LanceDB vector embeddings, integrates with Nemotron RAG models (locally or via NIM endpoints), which includes Ray‑based scaling plus built‑in recall evaluation. -