diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index 12ca74f18..fbb3fc85c 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -61,6 +61,7 @@ jobs: env: IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/k8s-device-plugin VERSION: ${COMMIT_SHORT_SHA} + CVE_UPDATES: "libarchive" run: | echo "${VERSION}" make -f deployments/container/Makefile build diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b2987764..16dd492d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ ## Changelog +## Version v0.17.4 +- Bump github.com/NVIDIA/go-nvlib from 0.7.2 to 0.7.4 +- Bump golang version to 1.23.12 +- Ensure that directory volumes have Directory type +- Ignore errors getting device memory using NVML + +### Version v0.17.3 +- Bump nvidia-container-toolkit to 1.17.8 +- Bump github.com/NVIDIA/go-nvml from 0.12.4-1 to 0.12.9-0 +- Bump golang version to 1.23.11 + ### Version v0.17.2 - Update nvidia.com/gpu.product label to include blackwell architectures - Update documentation to indicate that nvidia.com/gpu.memory label is in MiB instead of MB diff --git a/README.md b/README.md index ae106e034..0d8008e5c 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ Once you have configured the options above on all the GPU nodes in your cluster, you can enable GPU support by deploying the following Daemonset: ```shell -kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.3/deployments/static/nvidia-device-plugin.yml +kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.4/deployments/static/nvidia-device-plugin.yml ``` **Note:** This is a simple static daemonset meant to demonstrate the basic @@ -639,12 +639,12 @@ helm repo add nvdp https://nvidia.github.io/k8s-device-plugin helm repo update ``` -Then verify that the latest release (`v0.17.3`) of the plugin is available: +Then verify that the latest release (`v0.17.4`) of the plugin is available: ```shell $ helm search repo nvdp --devel NAME CHART VERSION APP VERSION DESCRIPTION -nvdp/nvidia-device-plugin 0.17.3 0.17.3 A Helm chart for ... +nvdp/nvidia-device-plugin 0.17.4 0.17.4 A Helm chart for ... ``` Once this repo is updated, you can begin installing packages from it to deploy @@ -656,7 +656,7 @@ The most basic installation command without any options is then: helm upgrade -i nvdp nvdp/nvidia-device-plugin \ --namespace nvidia-device-plugin \ --create-namespace \ - --version 0.17.3 + --version 0.17.4 ``` **Note:** You only need the to pass the `--devel` flag to `helm search repo` @@ -665,7 +665,7 @@ version (e.g. `-rc.1`). Full releases will be listed without this. ### Configuring the device plugin's `helm` chart -The `helm` chart for the latest release of the plugin (`v0.17.3`) includes +The `helm` chart for the latest release of the plugin (`v0.17.4`) includes a number of customizable values. Prior to `v0.12.0` the most commonly used values were those that had direct @@ -675,7 +675,7 @@ case of the original values is then to override an option from the `ConfigMap` if desired. Both methods are discussed in more detail below. The full set of values that can be set are found here: -[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.17.3/deployments/helm/nvidia-device-plugin/values.yaml). +[here](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.17.4/deployments/helm/nvidia-device-plugin/values.yaml). #### Passing configuration to the plugin via a `ConfigMap` @@ -718,7 +718,7 @@ And deploy the device plugin via helm (pointing it at this config file and givin ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set-file config.map.config=/tmp/dp-example-config0.yaml @@ -743,7 +743,7 @@ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.name=nvidia-plugin-configs @@ -773,7 +773,7 @@ And redeploy the device plugin via helm (pointing it at both configs with a spec ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -795,7 +795,7 @@ kubectl create cm -n nvidia-device-plugin nvidia-plugin-configs \ ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set config.default=config0 \ @@ -881,7 +881,7 @@ runtimeClassName: ``` Please take a look in the -[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.17.3/deployments/helm/nvidia-device-plugin/values.yaml) +[`values.yaml`](https://github.com/NVIDIA/k8s-device-plugin/blob/v0.17.4/deployments/helm/nvidia-device-plugin/values.yaml) file to see the full set of overridable parameters for the device plugin. Examples of setting these options include: @@ -891,7 +891,7 @@ Enabling compatibility with the `CPUManager` and running with a request for ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -903,7 +903,7 @@ Enabling compatibility with the `CPUManager` and the `mixed` `migStrategy`. ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set compatWithCPUManager=true \ @@ -922,7 +922,7 @@ To enable it, simply set `gfd.enabled=true` during helm install. ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --namespace nvidia-device-plugin \ --create-namespace \ --set gfd.enabled=true @@ -980,13 +980,13 @@ helm repo add nvdp https://nvidia.github.io/k8s-device-plugin helm repo update ``` -Then verify that the latest release (`v0.17.3`) of the plugin is available +Then verify that the latest release (`v0.17.4`) of the plugin is available (Note that this includes the GFD chart): ```shell helm search repo nvdp --devel NAME CHART VERSION APP VERSION DESCRIPTION -nvdp/nvidia-device-plugin 0.17.3 0.17.3 A Helm chart for ... +nvdp/nvidia-device-plugin 0.17.4 0.17.4 A Helm chart for ... ``` Once this repo is updated, you can begin installing packages from it to deploy @@ -996,7 +996,7 @@ The most basic installation command without any options is then: ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version 0.17.3 \ + --version 0.17.4 \ --namespace gpu-feature-discovery \ --create-namespace \ --set devicePlugin.enabled=false @@ -1007,7 +1007,7 @@ the default namespace. ```shell helm upgrade -i nvdp nvdp/nvidia-device-plugin \ - --version=0.17.3 \ + --version=0.17.4 \ --set allowDefaultNamespace=true \ --set nfd.enabled=false \ --set migStrategy=mixed \ @@ -1031,14 +1031,14 @@ Using the default values for the flags: helm upgrade -i nvdp \ --namespace nvidia-device-plugin \ --create-namespace \ - https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.17.3.tgz + https://nvidia.github.io/k8s-device-plugin/stable/nvidia-device-plugin-0.17.4.tgz ``` ## Building and Running Locally The next sections are focused on building the device plugin locally and running it. It is intended purely for development and testing, and not required by most users. -It assumes you are pinning to the latest release tag (i.e. `v0.17.3`), but can +It assumes you are pinning to the latest release tag (i.e. `v0.17.4`), but can easily be modified to work with any available tag or branch. ### With Docker @@ -1048,8 +1048,8 @@ easily be modified to work with any available tag or branch. Option 1, pull the prebuilt image from [Docker Hub](https://hub.docker.com/r/nvidia/k8s-device-plugin): ```shell -docker pull nvcr.io/nvidia/k8s-device-plugin:v0.17.3 -docker tag nvcr.io/nvidia/k8s-device-plugin:v0.17.3 nvcr.io/nvidia/k8s-device-plugin:devel +docker pull nvcr.io/nvidia/k8s-device-plugin:v0.17.4 +docker tag nvcr.io/nvidia/k8s-device-plugin:v0.17.4 nvcr.io/nvidia/k8s-device-plugin:devel ``` Option 2, build without cloning the repository: @@ -1058,7 +1058,7 @@ Option 2, build without cloning the repository: docker build \ -t nvcr.io/nvidia/k8s-device-plugin:devel \ -f deployments/container/Dockerfile.ubuntu \ - https://github.com/NVIDIA/k8s-device-plugin.git#v0.17.3 + https://github.com/NVIDIA/k8s-device-plugin.git#v0.17.4 ``` Option 3, if you want to modify the code: diff --git a/deployments/container/Dockerfile b/deployments/container/Dockerfile index fe624e206..a04a99ed2 100644 --- a/deployments/container/Dockerfile +++ b/deployments/container/Dockerfile @@ -53,6 +53,13 @@ RUN rpm -qa | sort -u > /tmp/package-list.minimal # We define the following image as a base image and remove unneeded packages. FROM nvcr.io/nvidia/cuda:13.0.0-base-ubi9 AS base +# Upgrade packages here that are required to resolve CVEs +ARG CVE_UPDATES +RUN if [ -n "${CVE_UPDATES}" ]; then \ + dnf update -y ${CVE_UPDATES} && \ + rm -rf /var/cache/yum/*; \ + fi + WORKDIR /cleanup COPY --from=minimal /tmp/package-names.minimal package-names.minimal diff --git a/deployments/container/Makefile b/deployments/container/Makefile index 04a4b8cdf..dbc493a16 100644 --- a/deployments/container/Makefile +++ b/deployments/container/Makefile @@ -82,6 +82,7 @@ $(IMAGE_TARGETS): image-%: --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg VERSION="$(VERSION)" \ --build-arg GIT_COMMIT="$(GIT_COMMIT)" \ + --build-arg CVE_UPDATES="$(CVE_UPDATES)" \ $(if $(LABEL_IMAGE_SOURCE),--label "org.opencontainers.image.source=$(LABEL_IMAGE_SOURCE)",) \ -f $(DOCKERFILE) \ $(CURDIR) diff --git a/deployments/helm/nvidia-device-plugin/Chart.yaml b/deployments/helm/nvidia-device-plugin/Chart.yaml index 25ef1fc1d..d3ae29b86 100644 --- a/deployments/helm/nvidia-device-plugin/Chart.yaml +++ b/deployments/helm/nvidia-device-plugin/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: nvidia-device-plugin type: application description: A Helm chart for the nvidia-device-plugin on Kubernetes -version: "0.17.3" -appVersion: "0.17.3" +version: "0.17.4" +appVersion: "0.17.4" kubeVersion: ">= 1.10.0-0" home: https://github.com/NVIDIA/k8s-device-plugin diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml index c8c9fb828..faa49924a 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-mixed.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml index 75fed3970..0532464e2 100644 --- a/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset-with-mig-single.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-daemonset.yaml b/deployments/static/gpu-feature-discovery-daemonset.yaml index f88bcdcb8..e85fea942 100644 --- a/deployments/static/gpu-feature-discovery-daemonset.yaml +++ b/deployments/static/gpu-feature-discovery-daemonset.yaml @@ -4,7 +4,7 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: selector: @@ -15,11 +15,11 @@ spec: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] volumeMounts: diff --git a/deployments/static/gpu-feature-discovery-job.yaml.template b/deployments/static/gpu-feature-discovery-job.yaml.template index 74b34f5dc..08e0bd04a 100644 --- a/deployments/static/gpu-feature-discovery-job.yaml.template +++ b/deployments/static/gpu-feature-discovery-job.yaml.template @@ -4,19 +4,19 @@ metadata: name: gpu-feature-discovery labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: template: metadata: labels: app.kubernetes.io/name: gpu-feature-discovery - app.kubernetes.io/version: 0.17.3 + app.kubernetes.io/version: 0.17.4 app.kubernetes.io/part-of: nvidia-gpu spec: nodeName: NODE_NAME containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: gpu-feature-discovery command: ["/usr/bin/gpu-feature-discovery"] args: diff --git a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml index 4a54165f2..9121b7e5f 100644 --- a/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml +++ b/deployments/static/nvidia-device-plugin-compat-with-cpumanager.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml index 49758d0fa..8ec4258bb 100644 --- a/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml +++ b/deployments/static/nvidia-device-plugin-privileged-with-service-account.yml @@ -124,7 +124,7 @@ spec: - env: - name: PASS_DEVICE_SPECS value: "true" - image: nvcr.io/nvidia/k8s-device-plugin:v0.16.1 + image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: nvidia-device-plugin-ctr securityContext: privileged: true diff --git a/deployments/static/nvidia-device-plugin.yml b/deployments/static/nvidia-device-plugin.yml index b96264dc0..8a4560022 100644 --- a/deployments/static/nvidia-device-plugin.yml +++ b/deployments/static/nvidia-device-plugin.yml @@ -38,7 +38,7 @@ spec: # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ priorityClassName: "system-node-critical" containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3 + - image: nvcr.io/nvidia/k8s-device-plugin:v0.17.4 name: nvidia-device-plugin-ctr env: - name: FAIL_ON_INIT_ERROR diff --git a/versions.mk b/versions.mk index 982bade8b..a044af959 100644 --- a/versions.mk +++ b/versions.mk @@ -17,7 +17,7 @@ MODULE := github.com/NVIDIA/$(DRIVER_NAME) REGISTRY ?= nvcr.io/nvidia -VERSION ?= v0.17.3 +VERSION ?= v0.17.4 # vVERSION represents the version with a guaranteed v-prefix vVERSION := v$(VERSION:v%=%)