From 07e0a24c02b5b9abe04daeaeb10e18e761cb30e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 09:12:24 +0000 Subject: [PATCH] Bump github.com/NVIDIA/go-nvml from 0.13.0-1 to 0.13.1-0 Bumps [github.com/NVIDIA/go-nvml](https://github.com/NVIDIA/go-nvml) from 0.13.0-1 to 0.13.1-0. - [Release notes](https://github.com/NVIDIA/go-nvml/releases) - [Commits](https://github.com/NVIDIA/go-nvml/compare/v0.13.0-1...v0.13.1-0) --- updated-dependencies: - dependency-name: github.com/NVIDIA/go-nvml dependency-version: 0.13.1-0 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 +- vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go | 28 +- .../NVIDIA/go-nvml/pkg/dl/dl_linux.go | 40 + .../NVIDIA/go-nvml/pkg/dl/dl_other.go | 33 + .../github.com/NVIDIA/go-nvml/pkg/nvml/api.go | 2 +- .../go-nvml/pkg/nvml/cgo_helpers_static.go | 48 + .../NVIDIA/go-nvml/pkg/nvml/const.go | 197 ++- .../NVIDIA/go-nvml/pkg/nvml/device.go | 146 +- .../github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go | 9 +- .../NVIDIA/go-nvml/pkg/nvml/mock/device.go | 262 ++++ .../NVIDIA/go-nvml/pkg/nvml/mock/interface.go | 300 ++++ .../NVIDIA/go-nvml/pkg/nvml/nvml.go | 54 + .../github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h | 1299 +++++++++++------ .../NVIDIA/go-nvml/pkg/nvml/types_gen.go | 146 +- .../go-nvml/pkg/nvml/zz_generated.api.go | 18 + vendor/modules.txt | 2 +- 17 files changed, 2049 insertions(+), 541 deletions(-) create mode 100644 vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_other.go diff --git a/go.mod b/go.mod index 7e23ed3..e80362d 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.20 require ( github.com/NVIDIA/go-nvlib v0.10.0 - github.com/NVIDIA/go-nvml v0.13.0-1 + github.com/NVIDIA/go-nvml v0.13.1-0 github.com/stretchr/testify v1.11.1 ) diff --git a/go.sum b/go.sum index dfa7bc0..70238e0 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ github.com/NVIDIA/go-nvlib v0.10.0 h1:2jbAFmvLBntIc/4iUChI9DzxyYNI92pohXU4kFuNrg0= github.com/NVIDIA/go-nvlib v0.10.0/go.mod h1:7mzx9FSdO9fXWP9NKuZmWkCwhkEcSWQFe2tmFwtLb9c= -github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw= -github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= +github.com/NVIDIA/go-nvml v0.13.1-0 h1:iazm3YDW1mYzvuY0uGyFsDvU87Ahr8P9AATN/VD4PZg= +github.com/NVIDIA/go-nvml v0.13.1-0/go.mod h1:ahi2psRYoa+wYUBIrZPRO+wJs9lcvMhxSSkjjvsJJNQ= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go index 34948a7..494ca82 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "runtime" + "strings" "unsafe" ) @@ -39,14 +40,29 @@ type DynamicLibrary struct { Name string Flags int handle unsafe.Pointer + path string } func New(name string, flags int) *DynamicLibrary { - return &DynamicLibrary{ - Name: name, - Flags: flags, - handle: nil, - } + return (&DynamicLibrary{ + Name: name, + Flags: flags, + }).init() +} + +func (dl *DynamicLibrary) reset() { + _ = dl.init() +} + +func (dl *DynamicLibrary) init() *DynamicLibrary { + dl.handle = nil + dl.path = func() string { + if strings.Contains(dl.Name, "/") { + return dl.Name + } + return "" + }() + return dl } func withOSLock(action func() error) error { @@ -89,7 +105,7 @@ func (dl *DynamicLibrary) Close() error { if C.dlclose(dl.handle) != 0 { return dlError() } - dl.handle = nil + dl.reset() return nil }); err != nil { return err diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_linux.go b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_linux.go index ae3acd0..6941d08 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_linux.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_linux.go @@ -17,10 +17,50 @@ package dl // #cgo LDFLAGS: -ldl +// #define _GNU_SOURCE // #include // #include +// #include import "C" +import ( + "fmt" + "path/filepath" + "unsafe" +) const ( RTLD_DEEPBIND = C.RTLD_DEEPBIND ) + +// Path returns the path to the loaded library. +// See https://man7.org/linux/man-pages/man3/dlinfo.3.html +func (dl *DynamicLibrary) Path() (string, error) { + if dl.handle == nil { + return "", fmt.Errorf("%v not opened", dl.Name) + } + + libParentPathBuffer := C.CBytes(make([]byte, 0, C.PATH_MAX)) + defer C.free(unsafe.Pointer(libParentPathBuffer)) + + var libPath string + if err := withOSLock(func() error { + if dl.path != "" { + libPath = dl.path + return nil + } + // Call dlError() to clear out any previous errors. + _ = dlError() + ret := C.dlinfo(dl.handle, C.RTLD_DI_ORIGIN, libParentPathBuffer) + if ret == -1 { + return fmt.Errorf("dlinfo call failed: %w", dlError()) + } + + libPath = filepath.Join(C.GoString((*C.char)(libParentPathBuffer)), dl.Name) + dl.path = libPath + + return nil + }); err != nil { + return "", err + } + return libPath, nil +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_other.go b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_other.go new file mode 100644 index 0000000..c483437 --- /dev/null +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/dl/dl_other.go @@ -0,0 +1,33 @@ +//go:build !linux + +/** +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package dl + +import ( + "fmt" +) + +// Path is NOT supported on non-Linux platforms. +// For example, on freebsd (darwin) systems, dladdr should be used instead of +// dlinfo which is used on linux. +// See for example: https://github.com/Manu343726/siplasplas/issues/82 +// For now we return an error. +func (dl *DynamicLibrary) Path() (string, error) { + return "", fmt.Errorf("not implemented") +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go index fdf27bd..0cdd274 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/api.go @@ -27,7 +27,7 @@ type ExtendedInterface interface { LookupSymbol(string) error } -// libraryOptions hold the paramaters than can be set by a LibraryOption +// libraryOptions hold the parameters that can be set by a LibraryOption type libraryOptions struct { path string flags int diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers_static.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers_static.go index 1f30eaa..fc67554 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers_static.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/cgo_helpers_static.go @@ -18,6 +18,9 @@ import ( "unsafe" ) +/* +#include +*/ import "C" var cgoAllocsUnknown = new(struct{}) @@ -55,6 +58,17 @@ func convertSlice[T any, I any](input []T) []I { return output } +func int32SliceToMask255(s []int32) Mask255 { + var m Mask255 + for _, p := range s { + if p < 0 || p >= 255 { + continue + } + m.Mask[p/32] |= 1 << (uint32(p) % 32) + } + return m +} + // packPCharString creates a Go string backed by *C.char and avoids copying. func packPCharString(p *C.char) (raw string) { if p != nil && *p != 0 { @@ -73,3 +87,37 @@ func unpackPCharString(str string) (*C.char, *struct{}) { h := (*stringHeader)(unsafe.Pointer(&str)) return (*C.char)(h.Data), cgoAllocsUnknown } + +func malloc(size uintptr) unsafe.Pointer { + return C.malloc(C.size_t(size)) +} + +func free(ptr unsafe.Pointer) { + C.free(ptr) +} + +// int8SliceToString converts a NUL-terminated C char array (typed as []int8) +// into a Go string, stopping at the first NUL. +func int8SliceToString(s []int8) string { + buf := make([]byte, len(s)) + for i, c := range s { + buf[i] = byte(c) + } + return string(buf[:clen(buf)]) +} + +// stringToInt8Slice copies s into out as a NUL-terminated C string. At most +// len(out)-1 bytes are written so the final byte is always a NUL terminator; +// remaining bytes in out are zeroed. +func stringToInt8Slice(s string, out []int8) { + n := len(s) + if n > len(out)-1 { + n = len(out) - 1 + } + for i := 0; i < n; i++ { + out[i] = int8(s[i]) + } + for i := n; i < len(out); i++ { + out[i] = 0 + } +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go index 8a6a93c..4228c4d 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/const.go @@ -76,6 +76,28 @@ const ( POWER_MIZER_MODE_AUTO = 2 // POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE as defined in nvml/nvml.h POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE = 3 + // DEVICE_HOSTNAME_BUFFER_SIZE as defined in nvml/nvml.h + DEVICE_HOSTNAME_BUFFER_SIZE = 64 + // RUSD_POLL_NONE as defined in nvml/nvml.h + RUSD_POLL_NONE = 0 + // RUSD_POLL_CLOCK as defined in nvml/nvml.h + RUSD_POLL_CLOCK = 1 + // RUSD_POLL_PERF as defined in nvml/nvml.h + RUSD_POLL_PERF = 2 + // RUSD_POLL_MEMORY as defined in nvml/nvml.h + RUSD_POLL_MEMORY = 4 + // RUSD_POLL_POWER as defined in nvml/nvml.h + RUSD_POLL_POWER = 8 + // RUSD_POLL_THERMAL as defined in nvml/nvml.h + RUSD_POLL_THERMAL = 16 + // RUSD_POLL_PCI as defined in nvml/nvml.h + RUSD_POLL_PCI = 32 + // RUSD_POLL_FAN as defined in nvml/nvml.h + RUSD_POLL_FAN = 64 + // RUSD_POLL_PROC_UTIL as defined in nvml/nvml.h + RUSD_POLL_PROC_UTIL = 128 + // RUSD_POLL_ALL as defined in nvml/nvml.h + RUSD_POLL_ALL = 18446744073709551615 // GSP_FIRMWARE_VERSION_BUF_SIZE as defined in nvml/nvml.h GSP_FIRMWARE_VERSION_BUF_SIZE = 64 // DEVICE_ARCH_KEPLER as defined in nvml/nvml.h @@ -714,58 +736,88 @@ const ( FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 = 249 // FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 as defined in nvml/nvml.h FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 = 250 - // FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP as defined in nvml/nvml.h - FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP = 74 - // FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST as defined in nvml/nvml.h - FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST = 76 - // FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN as defined in nvml/nvml.h - FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN = 251 - // FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN as defined in nvml/nvml.h - FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN = 252 - // FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN as defined in nvml/nvml.h - FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN = 253 - // FI_DEV_POWER_SYNC_BALANCING_FREQ as defined in nvml/nvml.h - FI_DEV_POWER_SYNC_BALANCING_FREQ = 254 - // FI_DEV_POWER_SYNC_BALANCING_AF as defined in nvml/nvml.h - FI_DEV_POWER_SYNC_BALANCING_AF = 255 // FI_PWR_SMOOTHING_ENABLED as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ENABLED = 256 + FI_PWR_SMOOTHING_ENABLED = 251 // FI_PWR_SMOOTHING_PRIV_LVL as defined in nvml/nvml.h - FI_PWR_SMOOTHING_PRIV_LVL = 257 + FI_PWR_SMOOTHING_PRIV_LVL = 252 // FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED as defined in nvml/nvml.h - FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED = 258 + FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED = 253 // FI_PWR_SMOOTHING_APPLIED_TMP_CEIL as defined in nvml/nvml.h - FI_PWR_SMOOTHING_APPLIED_TMP_CEIL = 259 + FI_PWR_SMOOTHING_APPLIED_TMP_CEIL = 254 // FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR as defined in nvml/nvml.h - FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR = 260 + FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR = 255 // FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING as defined in nvml/nvml.h - FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING = 261 + FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING = 256 // FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING as defined in nvml/nvml.h - FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING = 262 + FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING = 257 // FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING as defined in nvml/nvml.h - FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING = 263 + FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING = 258 // FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES as defined in nvml/nvml.h - FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES = 264 + FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES = 259 // FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR as defined in nvml/nvml.h - FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR = 265 + FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR = 260 // FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE as defined in nvml/nvml.h - FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE = 266 + FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE = 261 // FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE as defined in nvml/nvml.h - FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE = 267 + FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE = 262 // FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL as defined in nvml/nvml.h - FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL = 268 + FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL = 263 // FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE = 269 + FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE = 264 // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR = 270 + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR = 265 // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE = 271 + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE = 266 // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE = 272 + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE = 267 // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL as defined in nvml/nvml.h - FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL = 273 + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL = 268 + // FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP = 74 + // FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST = 76 + // FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN = 269 + // FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN = 270 + // FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN as defined in nvml/nvml.h + FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN = 271 + // FI_DEV_POWER_SYNC_BALANCING_FREQ as defined in nvml/nvml.h + FI_DEV_POWER_SYNC_BALANCING_FREQ = 272 + // FI_DEV_POWER_SYNC_BALANCING_AF as defined in nvml/nvml.h + FI_DEV_POWER_SYNC_BALANCING_AF = 273 + // FI_DEV_EDPP_MULTIPLIER as defined in nvml/nvml.h + FI_DEV_EDPP_MULTIPLIER = 274 + // FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR = 275 + // FI_PWR_SMOOTHING_SECONDARY_POWER_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_SECONDARY_POWER_FLOOR = 276 + // FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_OFFSET as defined in nvml/nvml.h + FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_OFFSET = 277 + // FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_POINT as defined in nvml/nvml.h + FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_POINT = 278 + // FI_PWR_SMOOTHING_WINDOW_MULTIPLIER as defined in nvml/nvml.h + FI_PWR_SMOOTHING_WINDOW_MULTIPLIER = 279 + // FI_PWR_SMOOTHING_DELAYED_PWR_SMOOTHING_SUPPORTED as defined in nvml/nvml.h + FI_PWR_SMOOTHING_DELAYED_PWR_SMOOTHING_SUPPORTED = 280 + // FI_PWR_SMOOTHING_PROFILE_SECONDARY_POWER_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_SECONDARY_POWER_FLOOR = 281 + // FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_WIN_MULT as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_WIN_MULT = 282 + // FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_TAR_WIN_MULT as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_TAR_WIN_MULT = 283 + // FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_OFFSET as defined in nvml/nvml.h + FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_OFFSET = 284 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_SECONDARY_POWER_FLOOR as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_SECONDARY_POWER_FLOOR = 285 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_WIN_MULT as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_WIN_MULT = 286 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_TAR_WIN_MULT as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_TAR_WIN_MULT = 287 + // FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_OFFSET as defined in nvml/nvml.h + FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_OFFSET = 288 // FI_MAX as defined in nvml/nvml.h - FI_MAX = 274 + FI_MAX = 289 // NVLINK_LOW_POWER_THRESHOLD_UNIT_100US as defined in nvml/nvml.h NVLINK_LOW_POWER_THRESHOLD_UNIT_100US = 0 // NVLINK_LOW_POWER_THRESHOLD_UNIT_50US as defined in nvml/nvml.h @@ -992,6 +1044,10 @@ const ( GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NO_PARTITION = 4 // GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS as defined in nvml/nvml.h GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS = 5 + // GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW = 6 + // GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION as defined in nvml/nvml.h + GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION = 7 // GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION as defined in nvml/nvml.h GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION = 8 // GPU_FABRIC_HEALTH_MASK_WIDTH_INCORRECT_CONFIGURATION as defined in nvml/nvml.h @@ -1008,6 +1064,8 @@ const ( INIT_FLAG_NO_GPUS = 1 // INIT_FLAG_NO_ATTACH as defined in nvml/nvml.h INIT_FLAG_NO_ATTACH = 2 + // INIT_FLAG_FORCE_INIT as defined in nvml/nvml.h + INIT_FLAG_FORCE_INIT = 4 // DEVICE_INFOROM_VERSION_BUFFER_SIZE as defined in nvml/nvml.h DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 // DEVICE_UUID_BUFFER_SIZE as defined in nvml/nvml.h @@ -1143,7 +1201,7 @@ const ( // POWER_SMOOTHING_MAX_NUM_PROFILES as defined in nvml/nvml.h POWER_SMOOTHING_MAX_NUM_PROFILES = 5 // POWER_SMOOTHING_NUM_PROFILE_PARAMS as defined in nvml/nvml.h - POWER_SMOOTHING_NUM_PROFILE_PARAMS = 4 + POWER_SMOOTHING_NUM_PROFILE_PARAMS = 8 // POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET as defined in nvml/nvml.h POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET = 4294967295 // POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR as defined in nvml/nvml.h @@ -1154,6 +1212,14 @@ const ( POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE = 2 // POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS as defined in nvml/nvml.h POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS = 3 + // POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR = 4 + // POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT = 5 + // POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT = 6 + // POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET as defined in nvml/nvml.h + POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET = 7 ) // BridgeChipType as declared in nvml/nvml.h @@ -1437,21 +1503,6 @@ const ( MEMORY_ERROR_TYPE_COUNT MemoryErrorType = 2 ) -// NvlinkVersion as declared in nvml/nvml.h -type NvlinkVersion int32 - -// NvlinkVersion enumeration from nvml/nvml.h -const ( - NVLINK_VERSION_INVALID NvlinkVersion = iota - NVLINK_VERSION_1_0 NvlinkVersion = 1 - NVLINK_VERSION_2_0 NvlinkVersion = 2 - NVLINK_VERSION_2_2 NvlinkVersion = 3 - NVLINK_VERSION_3_0 NvlinkVersion = 4 - NVLINK_VERSION_3_1 NvlinkVersion = 5 - NVLINK_VERSION_4_0 NvlinkVersion = 6 - NVLINK_VERSION_5_0 NvlinkVersion = 7 -) - // EccCounterType as declared in nvml/nvml.h type EccCounterType int32 @@ -1792,6 +1843,21 @@ const ( CLOCK_LIMIT_ID_UNLIMITED ClockLimitId = -254 ) +// NvlinkVersion as declared in nvml/nvml.h +type NvlinkVersion int32 + +// NvlinkVersion enumeration from nvml/nvml.h +const ( + NVLINK_VERSION_INVALID NvlinkVersion = iota + NVLINK_VERSION_1_0 NvlinkVersion = 1 + NVLINK_VERSION_2_0 NvlinkVersion = 2 + NVLINK_VERSION_2_2 NvlinkVersion = 3 + NVLINK_VERSION_3_0 NvlinkVersion = 4 + NVLINK_VERSION_3_1 NvlinkVersion = 5 + NVLINK_VERSION_4_0 NvlinkVersion = 6 + NVLINK_VERSION_5_0 NvlinkVersion = 7 +) + // VgpuVmCompatibility as declared in nvml/nvml.h type VgpuVmCompatibility int32 @@ -1892,6 +1958,28 @@ const ( GRID_LICENSE_FEATURE_CODE_COMPUTE GridLicenseFeatureCode = 4 ) +// PRMCounterId as declared in nvml/nvml.h +type PRMCounterId int32 + +// PRMCounterId enumeration from nvml/nvml.h +const ( + PRM_COUNTER_ID_NONE PRMCounterId = iota + PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_LINK_DOWN_EVENTS PRMCounterId = 1 + PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_SUCCESSFUL_RECOVERY_EVENTS PRMCounterId = 2 + PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TOTAL_SUCCESSFUL_RECOVERY_EVENTS PRMCounterId = 101 + PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_SINCE_LAST_RECOVERY PRMCounterId = 102 + PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_BETWEEN_LAST_TWO_RECOVERIES PRMCounterId = 103 + PRM_COUNTER_ID_PPCNT_PORTCOUNTERS_PORT_XMIT_WAIT PRMCounterId = 201 + PRM_COUNTER_ID_PPCNT_PLR_RCV_CODES PRMCounterId = 301 + PRM_COUNTER_ID_PPCNT_PLR_RCV_CODE_ERR PRMCounterId = 302 + PRM_COUNTER_ID_PPCNT_PLR_RCV_UNCORRECTABLE_CODE PRMCounterId = 303 + PRM_COUNTER_ID_PPCNT_PLR_XMIT_CODES PRMCounterId = 304 + PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_CODES PRMCounterId = 305 + PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_EVENTS PRMCounterId = 306 + PRM_COUNTER_ID_PPCNT_PLR_SYNC_EVENTS PRMCounterId = 307 + PRM_COUNTER_ID_PPRM_OPER_RECOVERY PRMCounterId = 1001 +) + // GpmMetricId as declared in nvml/nvml.h type GpmMetricId int32 @@ -2102,3 +2190,14 @@ const ( POWER_PROFILE_MIG PowerProfileType = 14 POWER_PROFILE_MAX PowerProfileType = 15 ) + +// PowerProfileOperation as declared in nvml/nvml.h +type PowerProfileOperation int32 + +// PowerProfileOperation enumeration from nvml/nvml.h +const ( + POWER_PROFILE_OPERATION_CLEAR PowerProfileOperation = iota + POWER_PROFILE_OPERATION_SET PowerProfileOperation = 1 + POWER_PROFILE_OPERATION_SET_AND_OVERWRITE PowerProfileOperation = 2 + POWER_PROFILE_OPERATION_MAX PowerProfileOperation = 3 +) diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go index d341e15..e93b5aa 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/device.go @@ -17,6 +17,7 @@ package nvml import ( "fmt" "reflect" + "runtime" "unsafe" ) @@ -31,7 +32,7 @@ func nvmlDeviceHandle(d Device) nvmlDevice { val = val.Elem() } - if val.Kind() == reflect.Ptr { + if val.Kind() == reflect.Pointer { val = val.Elem() } @@ -1397,6 +1398,26 @@ func (device nvmlDevice) GetPdi() (Pdi, Return) { return pdi, ret } +func (l *library) DeviceSetHostname_v1(device Device, hostName string) Return { + return device.SetHostname_v1(hostName) +} + +func (device nvmlDevice) SetHostname_v1(hostName string) Return { + var hostNameReq Hostname_v1 + stringToInt8Slice(hostName, hostNameReq.Value[:]) + return nvmlDeviceSetHostname_v1(device, &hostNameReq) +} + +func (l *library) DeviceGetHostname_v1(device Device) (string, Return) { + return device.GetHostname_v1() +} + +func (device nvmlDevice) GetHostname_v1() (string, Return) { + var hostName Hostname_v1 + ret := nvmlDeviceGetHostname_v1(device, &hostName) + return int8SliceToString(hostName.Value[:]), ret +} + // nvml.DeviceGetAccountingStats() func (l *library) DeviceGetAccountingStats(device Device, pid uint32) (AccountingStats, Return) { return device.GetAccountingStats(pid) @@ -2125,6 +2146,41 @@ func (device nvmlDevice) ReadWritePRM_v1(buffer *PRMTLV_v1) Return { return nvmlDeviceReadWritePRM_v1(device, buffer) } +func (l *library) DeviceReadPRMCounters_v1(device Device, prmCounters []PRMCounterId, localPort int) ([]PRMCounter_v1, Return) { + return device.ReadPRMCounters_v1(prmCounters, localPort) +} + +func (device nvmlDevice) ReadPRMCounters_v1(prmCounters []PRMCounterId, localPort int) ([]PRMCounter_v1, Return) { + if len(prmCounters) == 0 { + return nil, ERROR_INVALID_ARGUMENT + } + + inData := PRMCounterInput_v1{ + LocalPort: uint32(localPort), + } + + counters := make([]PRMCounter_v1, 0, len(prmCounters)) + for _, counterId := range prmCounters { + counters = append(counters, PRMCounter_v1{ + CounterId: uint32(counterId), + InData: inData, + }) + } + + var pinner runtime.Pinner + prmCounterList := PRMCounterList_v1{ + NumCounters: uint32(len(counters)), + Counters: &counters[0], + } + pinner.Pin(&counters[0]) + defer pinner.Unpin() + ret := nvmlDeviceReadPRMCounters_v1(device, &prmCounterList) + if ret != SUCCESS { + return nil, ret + } + return counters, ret +} + // nvml.DeviceSetMigMode() func (l *library) DeviceSetMigMode(device Device, mode int) (Return, Return) { return device.SetMigMode(mode) @@ -2931,10 +2987,56 @@ func (l *library) DeviceGetRunningProcessDetailList(device Device) (ProcessDetai } func (device nvmlDevice) GetRunningProcessDetailList() (ProcessDetailList, Return) { + return deviceGetRunningProcessDetailList(device) +} + +func deviceGetRunningProcessDetailList(device nvmlDevice) (ProcessDetailList, Return) { var plist ProcessDetailList plist.Version = STRUCT_VERSION(plist, 1) - ret := nvmlDeviceGetRunningProcessDetailList(device, &plist) - return plist, ret + plist.NumProcArrayEntries = 1 + + for { + // Allocate memory in cgo for ProcessDetailList::ProcArray + // We can't simply use a unsafe.Pointer of Go slice here + // otherwise it will trigger the following error: + // runtime error: cgo argument has Go pointer to unpinned Go pointer + count := plist.NumProcArrayEntries + cptr := malloc(uintptr(count) * unsafe.Sizeof(ProcessDetail_v1{})) + if cptr == nil { + return plist, ERROR_MEMORY + } + + plist.ProcArray = (*ProcessDetail_v1)(cptr) + ret := nvmlDeviceGetRunningProcessDetailList(device, &plist) + if ret == SUCCESS { + out := make([]ProcessDetail_v1, plist.NumProcArrayEntries) + src := unsafe.Slice((*ProcessDetail_v1)(cptr), plist.NumProcArrayEntries) + copy(out, src) + + if plist.NumProcArrayEntries > 0 { + plist.ProcArray = &out[0] + } else { + plist.ProcArray = nil + } + + // Clean up C memory before return + free(cptr) + + return plist, ret + } + + // Clean up C memory before retry/return + if cptr != nil { + free(cptr) + } + + if ret != ERROR_INSUFFICIENT_SIZE { + return plist, ret + } + + // Increase capacity and retry + plist.NumProcArrayEntries *= 2 + } } // nvml.DeviceGetConfComputeMemSizeInfo() @@ -3052,6 +3154,16 @@ func (device nvmlDevice) GetRepairStatus() (RepairStatus, Return) { return repairStatus, ret } +func (l *library) DeviceGetUnrepairableMemoryFlag_v1(device Device) (UnrepairableMemoryStatus_v1, Return) { + return device.GetUnrepairableMemoryFlag_v1() +} + +func (device nvmlDevice) GetUnrepairableMemoryFlag_v1() (UnrepairableMemoryStatus_v1, Return) { + var status UnrepairableMemoryStatus_v1 + ret := nvmlDeviceGetUnrepairableMemoryFlag_v1(device, &status) + return status, ret +} + // nvml.DeviceGetPciInfoExt() func (l *library) DeviceGetPciInfoExt(device Device) (PciInfoExt, Return) { return device.GetPciInfoExt() @@ -3466,6 +3578,25 @@ func (device nvmlDevice) WorkloadPowerProfileClearRequestedProfiles(requestedPro return nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(device, requestedProfiles) } +// nvml.DeviceWorkloadPowerProfileUpdateProfiles_v1 +func (l *library) DeviceWorkloadPowerProfileUpdateProfiles_v1(device Device, operation PowerProfileOperation, profileTypes []PowerProfileType) Return { + return device.WorkloadPowerProfileUpdateProfiles_v1(operation, profileTypes) +} + +func (device nvmlDevice) WorkloadPowerProfileUpdateProfiles_v1(operation PowerProfileOperation, profileTypes []PowerProfileType) Return { + var profileTypesInt32 []int32 + for _, profileType := range profileTypes { + profileTypesInt32 = append(profileTypesInt32, int32(profileType)) + } + updateProfileMask := int32SliceToMask255(profileTypesInt32) + updateProfilesRequest := WorkloadPowerProfileUpdateProfiles_v1{ + Operation: uint32(operation), + UpdateProfilesMask: updateProfileMask, + } + + return nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1(device, &updateProfilesRequest) +} + // nvml.DevicePowerSmoothingActivatePresetProfile() func (l *library) DevicePowerSmoothingActivatePresetProfile(device Device, profile *PowerSmoothingProfile) Return { return device.PowerSmoothingActivatePresetProfile(profile) @@ -3501,6 +3632,15 @@ func (device nvmlDevice) GetSramUniqueUncorrectedEccErrorCounts(errorCounts *Ecc return nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(device, errorCounts) } +// nvml.DeviceSetRusdSettings_v1() +func (l *library) DeviceSetRusdSettings_v1(device Device, settings RusdSettings_v1) Return { + return device.SetRusdSettings_v1(settings) +} +func (device nvmlDevice) SetRusdSettings_v1(settings RusdSettings_v1) Return { + settings.Version = STRUCT_VERSION(settings, 1) + return nvmlDeviceSetRusdSettings_v1(device, &settings) +} + // nvml.GpuInstanceGetCreatableVgpus() func (l *library) GpuInstanceGetCreatableVgpus(gpuInstance GpuInstance) (VgpuTypeIdInfo, Return) { return gpuInstance.GetCreatableVgpus() diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go index 563bc59..3452c0b 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/gpm.go @@ -58,6 +58,7 @@ func (l *library) GpmMetricsGetV(metricsGet *GpmMetricsGetType) GpmMetricsGetVTy // nvmlGpmMetricsGetStub is a stub function that can be overridden for testing. var nvmlGpmMetricsGetStub = nvmlGpmMetricsGet +var nvmlGpmQueryDeviceSupportStub = nvmlGpmQueryDeviceSupport func (metricsGetV GpmMetricsGetVType) V1() Return { metricsGetV.metricsGet.Version = 1 @@ -120,8 +121,8 @@ func (device nvmlDevice) GpmQueryDeviceSupportV() GpmSupportV { func (gpmSupportV GpmSupportV) V1() (GpmSupport, Return) { var gpmSupport GpmSupport - gpmSupport.Version = STRUCT_VERSION(gpmSupport, 1) - ret := nvmlGpmQueryDeviceSupport(gpmSupportV.device, &gpmSupport) + gpmSupport.Version = GPM_SUPPORT_VERSION + ret := nvmlGpmQueryDeviceSupportStub(gpmSupportV.device, &gpmSupport) return gpmSupport, ret } @@ -131,8 +132,8 @@ func (l *library) GpmQueryDeviceSupport(device Device) (GpmSupport, Return) { func (device nvmlDevice) GpmQueryDeviceSupport() (GpmSupport, Return) { var gpmSupport GpmSupport - gpmSupport.Version = STRUCT_VERSION(gpmSupport, GPM_SUPPORT_VERSION) - ret := nvmlGpmQueryDeviceSupport(device, &gpmSupport) + gpmSupport.Version = GPM_SUPPORT_VERSION + ret := nvmlGpmQueryDeviceSupportStub(device, &gpmSupport) return gpmSupport, ret } diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/device.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/device.go index 2639728..aaded22 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/device.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/device.go @@ -291,6 +291,9 @@ var _ nvml.Device = &Device{} // GetHostVgpuModeFunc: func() (nvml.HostVgpuMode, nvml.Return) { // panic("mock out the GetHostVgpuMode method") // }, +// GetHostname_v1Func: func() (string, nvml.Return) { +// panic("mock out the GetHostname_v1 method") +// }, // GetIndexFunc: func() (int, nvml.Return) { // panic("mock out the GetIndex method") // }, @@ -576,6 +579,9 @@ var _ nvml.Device = &Device{} // GetUUIDFunc: func() (string, nvml.Return) { // panic("mock out the GetUUID method") // }, +// GetUnrepairableMemoryFlag_v1Func: func() (nvml.UnrepairableMemoryStatus_v1, nvml.Return) { +// panic("mock out the GetUnrepairableMemoryFlag_v1 method") +// }, // GetUtilizationRatesFunc: func() (nvml.Utilization, nvml.Return) { // panic("mock out the GetUtilizationRates method") // }, @@ -657,6 +663,9 @@ var _ nvml.Device = &Device{} // PowerSmoothingUpdatePresetProfileParamFunc: func(powerSmoothingProfile *nvml.PowerSmoothingProfile) nvml.Return { // panic("mock out the PowerSmoothingUpdatePresetProfileParam method") // }, +// ReadPRMCounters_v1Func: func(pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) { +// panic("mock out the ReadPRMCounters_v1 method") +// }, // ReadWritePRM_v1Func: func(pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return { // panic("mock out the ReadWritePRM_v1 method") // }, @@ -732,6 +741,9 @@ var _ nvml.Device = &Device{} // SetGpuOperationModeFunc: func(gpuOperationMode nvml.GpuOperationMode) nvml.Return { // panic("mock out the SetGpuOperationMode method") // }, +// SetHostname_v1Func: func(s string) nvml.Return { +// panic("mock out the SetHostname_v1 method") +// }, // SetMemClkVfOffsetFunc: func(n int) nvml.Return { // panic("mock out the SetMemClkVfOffset method") // }, @@ -759,6 +771,9 @@ var _ nvml.Device = &Device{} // SetPowerManagementLimit_v2Func: func(powerValue_v2 *nvml.PowerValue_v2) nvml.Return { // panic("mock out the SetPowerManagementLimit_v2 method") // }, +// SetRusdSettings_v1Func: func(rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return { +// panic("mock out the SetRusdSettings_v1 method") +// }, // SetTemperatureThresholdFunc: func(temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return { // panic("mock out the SetTemperatureThreshold method") // }, @@ -792,6 +807,9 @@ var _ nvml.Device = &Device{} // WorkloadPowerProfileSetRequestedProfilesFunc: func(workloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles) nvml.Return { // panic("mock out the WorkloadPowerProfileSetRequestedProfiles method") // }, +// WorkloadPowerProfileUpdateProfiles_v1Func: func(powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return { +// panic("mock out the WorkloadPowerProfileUpdateProfiles_v1 method") +// }, // } // // // use mockedDevice in code that requires nvml.Device @@ -1072,6 +1090,9 @@ type Device struct { // GetHostVgpuModeFunc mocks the GetHostVgpuMode method. GetHostVgpuModeFunc func() (nvml.HostVgpuMode, nvml.Return) + // GetHostname_v1Func mocks the GetHostname_v1 method. + GetHostname_v1Func func() (string, nvml.Return) + // GetIndexFunc mocks the GetIndex method. GetIndexFunc func() (int, nvml.Return) @@ -1357,6 +1378,9 @@ type Device struct { // GetUUIDFunc mocks the GetUUID method. GetUUIDFunc func() (string, nvml.Return) + // GetUnrepairableMemoryFlag_v1Func mocks the GetUnrepairableMemoryFlag_v1 method. + GetUnrepairableMemoryFlag_v1Func func() (nvml.UnrepairableMemoryStatus_v1, nvml.Return) + // GetUtilizationRatesFunc mocks the GetUtilizationRates method. GetUtilizationRatesFunc func() (nvml.Utilization, nvml.Return) @@ -1438,6 +1462,9 @@ type Device struct { // PowerSmoothingUpdatePresetProfileParamFunc mocks the PowerSmoothingUpdatePresetProfileParam method. PowerSmoothingUpdatePresetProfileParamFunc func(powerSmoothingProfile *nvml.PowerSmoothingProfile) nvml.Return + // ReadPRMCounters_v1Func mocks the ReadPRMCounters_v1 method. + ReadPRMCounters_v1Func func(pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) + // ReadWritePRM_v1Func mocks the ReadWritePRM_v1 method. ReadWritePRM_v1Func func(pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return @@ -1513,6 +1540,9 @@ type Device struct { // SetGpuOperationModeFunc mocks the SetGpuOperationMode method. SetGpuOperationModeFunc func(gpuOperationMode nvml.GpuOperationMode) nvml.Return + // SetHostname_v1Func mocks the SetHostname_v1 method. + SetHostname_v1Func func(s string) nvml.Return + // SetMemClkVfOffsetFunc mocks the SetMemClkVfOffset method. SetMemClkVfOffsetFunc func(n int) nvml.Return @@ -1540,6 +1570,9 @@ type Device struct { // SetPowerManagementLimit_v2Func mocks the SetPowerManagementLimit_v2 method. SetPowerManagementLimit_v2Func func(powerValue_v2 *nvml.PowerValue_v2) nvml.Return + // SetRusdSettings_v1Func mocks the SetRusdSettings_v1 method. + SetRusdSettings_v1Func func(rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return + // SetTemperatureThresholdFunc mocks the SetTemperatureThreshold method. SetTemperatureThresholdFunc func(temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return @@ -1573,6 +1606,9 @@ type Device struct { // WorkloadPowerProfileSetRequestedProfilesFunc mocks the WorkloadPowerProfileSetRequestedProfiles method. WorkloadPowerProfileSetRequestedProfilesFunc func(workloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles) nvml.Return + // WorkloadPowerProfileUpdateProfiles_v1Func mocks the WorkloadPowerProfileUpdateProfiles_v1 method. + WorkloadPowerProfileUpdateProfiles_v1Func func(powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return + // calls tracks calls to the methods. calls struct { // ClearAccountingPids holds details about calls to the ClearAccountingPids method. @@ -1912,6 +1948,9 @@ type Device struct { // GetHostVgpuMode holds details about calls to the GetHostVgpuMode method. GetHostVgpuMode []struct { } + // GetHostname_v1 holds details about calls to the GetHostname_v1 method. + GetHostname_v1 []struct { + } // GetIndex holds details about calls to the GetIndex method. GetIndex []struct { } @@ -2279,6 +2318,9 @@ type Device struct { // GetUUID holds details about calls to the GetUUID method. GetUUID []struct { } + // GetUnrepairableMemoryFlag_v1 holds details about calls to the GetUnrepairableMemoryFlag_v1 method. + GetUnrepairableMemoryFlag_v1 []struct { + } // GetUtilizationRates holds details about calls to the GetUtilizationRates method. GetUtilizationRates []struct { } @@ -2388,6 +2430,13 @@ type Device struct { // PowerSmoothingProfile is the powerSmoothingProfile argument value. PowerSmoothingProfile *nvml.PowerSmoothingProfile } + // ReadPRMCounters_v1 holds details about calls to the ReadPRMCounters_v1 method. + ReadPRMCounters_v1 []struct { + // PRMCounterIds is the pRMCounterIds argument value. + PRMCounterIds []nvml.PRMCounterId + // N is the n argument value. + N int + } // ReadWritePRM_v1 holds details about calls to the ReadWritePRM_v1 method. ReadWritePRM_v1 []struct { // PRMTLV_v1 is the pRMTLV_v1 argument value. @@ -2523,6 +2572,11 @@ type Device struct { // GpuOperationMode is the gpuOperationMode argument value. GpuOperationMode nvml.GpuOperationMode } + // SetHostname_v1 holds details about calls to the SetHostname_v1 method. + SetHostname_v1 []struct { + // S is the s argument value. + S string + } // SetMemClkVfOffset holds details about calls to the SetMemClkVfOffset method. SetMemClkVfOffset []struct { // N is the n argument value. @@ -2576,6 +2630,11 @@ type Device struct { // PowerValue_v2 is the powerValue_v2 argument value. PowerValue_v2 *nvml.PowerValue_v2 } + // SetRusdSettings_v1 holds details about calls to the SetRusdSettings_v1 method. + SetRusdSettings_v1 []struct { + // RusdSettings_v1 is the rusdSettings_v1 argument value. + RusdSettings_v1 nvml.RusdSettings_v1 + } // SetTemperatureThreshold holds details about calls to the SetTemperatureThreshold method. SetTemperatureThreshold []struct { // TemperatureThresholds is the temperatureThresholds argument value. @@ -2629,6 +2688,13 @@ type Device struct { // WorkloadPowerProfileRequestedProfiles is the workloadPowerProfileRequestedProfiles argument value. WorkloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles } + // WorkloadPowerProfileUpdateProfiles_v1 holds details about calls to the WorkloadPowerProfileUpdateProfiles_v1 method. + WorkloadPowerProfileUpdateProfiles_v1 []struct { + // PowerProfileOperation is the powerProfileOperation argument value. + PowerProfileOperation nvml.PowerProfileOperation + // PowerProfileTypes is the powerProfileTypes argument value. + PowerProfileTypes []nvml.PowerProfileType + } } lockClearAccountingPids sync.RWMutex lockClearCpuAffinity sync.RWMutex @@ -2721,6 +2787,7 @@ type Device struct { lockGetGspFirmwareMode sync.RWMutex lockGetGspFirmwareVersion sync.RWMutex lockGetHostVgpuMode sync.RWMutex + lockGetHostname_v1 sync.RWMutex lockGetIndex sync.RWMutex lockGetInforomConfigurationChecksum sync.RWMutex lockGetInforomImageVersion sync.RWMutex @@ -2816,6 +2883,7 @@ type Device struct { lockGetTotalEccErrors sync.RWMutex lockGetTotalEnergyConsumption sync.RWMutex lockGetUUID sync.RWMutex + lockGetUnrepairableMemoryFlag_v1 sync.RWMutex lockGetUtilizationRates sync.RWMutex lockGetVbiosVersion sync.RWMutex lockGetVgpuCapabilities sync.RWMutex @@ -2843,6 +2911,7 @@ type Device struct { lockPowerSmoothingActivatePresetProfile sync.RWMutex lockPowerSmoothingSetState sync.RWMutex lockPowerSmoothingUpdatePresetProfileParam sync.RWMutex + lockReadPRMCounters_v1 sync.RWMutex lockReadWritePRM_v1 sync.RWMutex lockRegisterEvents sync.RWMutex lockResetApplicationsClocks sync.RWMutex @@ -2868,6 +2937,7 @@ type Device struct { lockSetGpcClkVfOffset sync.RWMutex lockSetGpuLockedClocks sync.RWMutex lockSetGpuOperationMode sync.RWMutex + lockSetHostname_v1 sync.RWMutex lockSetMemClkVfOffset sync.RWMutex lockSetMemoryLockedClocks sync.RWMutex lockSetMigMode sync.RWMutex @@ -2877,6 +2947,7 @@ type Device struct { lockSetPersistenceMode sync.RWMutex lockSetPowerManagementLimit sync.RWMutex lockSetPowerManagementLimit_v2 sync.RWMutex + lockSetRusdSettings_v1 sync.RWMutex lockSetTemperatureThreshold sync.RWMutex lockSetVgpuCapabilities sync.RWMutex lockSetVgpuHeterogeneousMode sync.RWMutex @@ -2888,6 +2959,7 @@ type Device struct { lockWorkloadPowerProfileGetCurrentProfiles sync.RWMutex lockWorkloadPowerProfileGetProfilesInfo sync.RWMutex lockWorkloadPowerProfileSetRequestedProfiles sync.RWMutex + lockWorkloadPowerProfileUpdateProfiles_v1 sync.RWMutex } // ClearAccountingPids calls ClearAccountingPidsFunc. @@ -5501,6 +5573,33 @@ func (mock *Device) GetHostVgpuModeCalls() []struct { return calls } +// GetHostname_v1 calls GetHostname_v1Func. +func (mock *Device) GetHostname_v1() (string, nvml.Return) { + if mock.GetHostname_v1Func == nil { + panic("Device.GetHostname_v1Func: method is nil but Device.GetHostname_v1 was just called") + } + callInfo := struct { + }{} + mock.lockGetHostname_v1.Lock() + mock.calls.GetHostname_v1 = append(mock.calls.GetHostname_v1, callInfo) + mock.lockGetHostname_v1.Unlock() + return mock.GetHostname_v1Func() +} + +// GetHostname_v1Calls gets all the calls that were made to GetHostname_v1. +// Check the length with: +// +// len(mockedDevice.GetHostname_v1Calls()) +func (mock *Device) GetHostname_v1Calls() []struct { +} { + var calls []struct { + } + mock.lockGetHostname_v1.RLock() + calls = mock.calls.GetHostname_v1 + mock.lockGetHostname_v1.RUnlock() + return calls +} + // GetIndex calls GetIndexFunc. func (mock *Device) GetIndex() (int, nvml.Return) { if mock.GetIndexFunc == nil { @@ -8260,6 +8359,33 @@ func (mock *Device) GetUUIDCalls() []struct { return calls } +// GetUnrepairableMemoryFlag_v1 calls GetUnrepairableMemoryFlag_v1Func. +func (mock *Device) GetUnrepairableMemoryFlag_v1() (nvml.UnrepairableMemoryStatus_v1, nvml.Return) { + if mock.GetUnrepairableMemoryFlag_v1Func == nil { + panic("Device.GetUnrepairableMemoryFlag_v1Func: method is nil but Device.GetUnrepairableMemoryFlag_v1 was just called") + } + callInfo := struct { + }{} + mock.lockGetUnrepairableMemoryFlag_v1.Lock() + mock.calls.GetUnrepairableMemoryFlag_v1 = append(mock.calls.GetUnrepairableMemoryFlag_v1, callInfo) + mock.lockGetUnrepairableMemoryFlag_v1.Unlock() + return mock.GetUnrepairableMemoryFlag_v1Func() +} + +// GetUnrepairableMemoryFlag_v1Calls gets all the calls that were made to GetUnrepairableMemoryFlag_v1. +// Check the length with: +// +// len(mockedDevice.GetUnrepairableMemoryFlag_v1Calls()) +func (mock *Device) GetUnrepairableMemoryFlag_v1Calls() []struct { +} { + var calls []struct { + } + mock.lockGetUnrepairableMemoryFlag_v1.RLock() + calls = mock.calls.GetUnrepairableMemoryFlag_v1 + mock.lockGetUnrepairableMemoryFlag_v1.RUnlock() + return calls +} + // GetUtilizationRates calls GetUtilizationRatesFunc. func (mock *Device) GetUtilizationRates() (nvml.Utilization, nvml.Return) { if mock.GetUtilizationRatesFunc == nil { @@ -9058,6 +9184,42 @@ func (mock *Device) PowerSmoothingUpdatePresetProfileParamCalls() []struct { return calls } +// ReadPRMCounters_v1 calls ReadPRMCounters_v1Func. +func (mock *Device) ReadPRMCounters_v1(pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) { + if mock.ReadPRMCounters_v1Func == nil { + panic("Device.ReadPRMCounters_v1Func: method is nil but Device.ReadPRMCounters_v1 was just called") + } + callInfo := struct { + PRMCounterIds []nvml.PRMCounterId + N int + }{ + PRMCounterIds: pRMCounterIds, + N: n, + } + mock.lockReadPRMCounters_v1.Lock() + mock.calls.ReadPRMCounters_v1 = append(mock.calls.ReadPRMCounters_v1, callInfo) + mock.lockReadPRMCounters_v1.Unlock() + return mock.ReadPRMCounters_v1Func(pRMCounterIds, n) +} + +// ReadPRMCounters_v1Calls gets all the calls that were made to ReadPRMCounters_v1. +// Check the length with: +// +// len(mockedDevice.ReadPRMCounters_v1Calls()) +func (mock *Device) ReadPRMCounters_v1Calls() []struct { + PRMCounterIds []nvml.PRMCounterId + N int +} { + var calls []struct { + PRMCounterIds []nvml.PRMCounterId + N int + } + mock.lockReadPRMCounters_v1.RLock() + calls = mock.calls.ReadPRMCounters_v1 + mock.lockReadPRMCounters_v1.RUnlock() + return calls +} + // ReadWritePRM_v1 calls ReadWritePRM_v1Func. func (mock *Device) ReadWritePRM_v1(pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return { if mock.ReadWritePRM_v1Func == nil { @@ -9874,6 +10036,38 @@ func (mock *Device) SetGpuOperationModeCalls() []struct { return calls } +// SetHostname_v1 calls SetHostname_v1Func. +func (mock *Device) SetHostname_v1(s string) nvml.Return { + if mock.SetHostname_v1Func == nil { + panic("Device.SetHostname_v1Func: method is nil but Device.SetHostname_v1 was just called") + } + callInfo := struct { + S string + }{ + S: s, + } + mock.lockSetHostname_v1.Lock() + mock.calls.SetHostname_v1 = append(mock.calls.SetHostname_v1, callInfo) + mock.lockSetHostname_v1.Unlock() + return mock.SetHostname_v1Func(s) +} + +// SetHostname_v1Calls gets all the calls that were made to SetHostname_v1. +// Check the length with: +// +// len(mockedDevice.SetHostname_v1Calls()) +func (mock *Device) SetHostname_v1Calls() []struct { + S string +} { + var calls []struct { + S string + } + mock.lockSetHostname_v1.RLock() + calls = mock.calls.SetHostname_v1 + mock.lockSetHostname_v1.RUnlock() + return calls +} + // SetMemClkVfOffset calls SetMemClkVfOffsetFunc. func (mock *Device) SetMemClkVfOffset(n int) nvml.Return { if mock.SetMemClkVfOffsetFunc == nil { @@ -10178,6 +10372,38 @@ func (mock *Device) SetPowerManagementLimit_v2Calls() []struct { return calls } +// SetRusdSettings_v1 calls SetRusdSettings_v1Func. +func (mock *Device) SetRusdSettings_v1(rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return { + if mock.SetRusdSettings_v1Func == nil { + panic("Device.SetRusdSettings_v1Func: method is nil but Device.SetRusdSettings_v1 was just called") + } + callInfo := struct { + RusdSettings_v1 nvml.RusdSettings_v1 + }{ + RusdSettings_v1: rusdSettings_v1, + } + mock.lockSetRusdSettings_v1.Lock() + mock.calls.SetRusdSettings_v1 = append(mock.calls.SetRusdSettings_v1, callInfo) + mock.lockSetRusdSettings_v1.Unlock() + return mock.SetRusdSettings_v1Func(rusdSettings_v1) +} + +// SetRusdSettings_v1Calls gets all the calls that were made to SetRusdSettings_v1. +// Check the length with: +// +// len(mockedDevice.SetRusdSettings_v1Calls()) +func (mock *Device) SetRusdSettings_v1Calls() []struct { + RusdSettings_v1 nvml.RusdSettings_v1 +} { + var calls []struct { + RusdSettings_v1 nvml.RusdSettings_v1 + } + mock.lockSetRusdSettings_v1.RLock() + calls = mock.calls.SetRusdSettings_v1 + mock.lockSetRusdSettings_v1.RUnlock() + return calls +} + // SetTemperatureThreshold calls SetTemperatureThresholdFunc. func (mock *Device) SetTemperatureThreshold(temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return { if mock.SetTemperatureThresholdFunc == nil { @@ -10522,3 +10748,39 @@ func (mock *Device) WorkloadPowerProfileSetRequestedProfilesCalls() []struct { mock.lockWorkloadPowerProfileSetRequestedProfiles.RUnlock() return calls } + +// WorkloadPowerProfileUpdateProfiles_v1 calls WorkloadPowerProfileUpdateProfiles_v1Func. +func (mock *Device) WorkloadPowerProfileUpdateProfiles_v1(powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return { + if mock.WorkloadPowerProfileUpdateProfiles_v1Func == nil { + panic("Device.WorkloadPowerProfileUpdateProfiles_v1Func: method is nil but Device.WorkloadPowerProfileUpdateProfiles_v1 was just called") + } + callInfo := struct { + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType + }{ + PowerProfileOperation: powerProfileOperation, + PowerProfileTypes: powerProfileTypes, + } + mock.lockWorkloadPowerProfileUpdateProfiles_v1.Lock() + mock.calls.WorkloadPowerProfileUpdateProfiles_v1 = append(mock.calls.WorkloadPowerProfileUpdateProfiles_v1, callInfo) + mock.lockWorkloadPowerProfileUpdateProfiles_v1.Unlock() + return mock.WorkloadPowerProfileUpdateProfiles_v1Func(powerProfileOperation, powerProfileTypes) +} + +// WorkloadPowerProfileUpdateProfiles_v1Calls gets all the calls that were made to WorkloadPowerProfileUpdateProfiles_v1. +// Check the length with: +// +// len(mockedDevice.WorkloadPowerProfileUpdateProfiles_v1Calls()) +func (mock *Device) WorkloadPowerProfileUpdateProfiles_v1Calls() []struct { + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType +} { + var calls []struct { + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType + } + mock.lockWorkloadPowerProfileUpdateProfiles_v1.RLock() + calls = mock.calls.WorkloadPowerProfileUpdateProfiles_v1 + mock.lockWorkloadPowerProfileUpdateProfiles_v1.RUnlock() + return calls +} diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/interface.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/interface.go index dc25ce2..fa4dee0 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/interface.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/mock/interface.go @@ -318,6 +318,9 @@ var _ nvml.Interface = &Interface{} // DeviceGetHostVgpuModeFunc: func(device nvml.Device) (nvml.HostVgpuMode, nvml.Return) { // panic("mock out the DeviceGetHostVgpuMode method") // }, +// DeviceGetHostname_v1Func: func(device nvml.Device) (string, nvml.Return) { +// panic("mock out the DeviceGetHostname_v1 method") +// }, // DeviceGetIndexFunc: func(device nvml.Device) (int, nvml.Return) { // panic("mock out the DeviceGetIndex method") // }, @@ -603,6 +606,9 @@ var _ nvml.Interface = &Interface{} // DeviceGetUUIDFunc: func(device nvml.Device) (string, nvml.Return) { // panic("mock out the DeviceGetUUID method") // }, +// DeviceGetUnrepairableMemoryFlag_v1Func: func(device nvml.Device) (nvml.UnrepairableMemoryStatus_v1, nvml.Return) { +// panic("mock out the DeviceGetUnrepairableMemoryFlag_v1 method") +// }, // DeviceGetUtilizationRatesFunc: func(device nvml.Device) (nvml.Utilization, nvml.Return) { // panic("mock out the DeviceGetUtilizationRates method") // }, @@ -672,6 +678,9 @@ var _ nvml.Interface = &Interface{} // DeviceQueryDrainStateFunc: func(pciInfo *nvml.PciInfo) (nvml.EnableState, nvml.Return) { // panic("mock out the DeviceQueryDrainState method") // }, +// DeviceReadPRMCounters_v1Func: func(device nvml.Device, pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) { +// panic("mock out the DeviceReadPRMCounters_v1 method") +// }, // DeviceReadWritePRM_v1Func: func(device nvml.Device, pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return { // panic("mock out the DeviceReadWritePRM_v1 method") // }, @@ -753,6 +762,9 @@ var _ nvml.Interface = &Interface{} // DeviceSetGpuOperationModeFunc: func(device nvml.Device, gpuOperationMode nvml.GpuOperationMode) nvml.Return { // panic("mock out the DeviceSetGpuOperationMode method") // }, +// DeviceSetHostname_v1Func: func(device nvml.Device, s string) nvml.Return { +// panic("mock out the DeviceSetHostname_v1 method") +// }, // DeviceSetMemClkVfOffsetFunc: func(device nvml.Device, n int) nvml.Return { // panic("mock out the DeviceSetMemClkVfOffset method") // }, @@ -780,6 +792,9 @@ var _ nvml.Interface = &Interface{} // DeviceSetPowerManagementLimit_v2Func: func(device nvml.Device, powerValue_v2 *nvml.PowerValue_v2) nvml.Return { // panic("mock out the DeviceSetPowerManagementLimit_v2 method") // }, +// DeviceSetRusdSettings_v1Func: func(device nvml.Device, rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return { +// panic("mock out the DeviceSetRusdSettings_v1 method") +// }, // DeviceSetTemperatureThresholdFunc: func(device nvml.Device, temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return { // panic("mock out the DeviceSetTemperatureThreshold method") // }, @@ -810,6 +825,9 @@ var _ nvml.Interface = &Interface{} // DeviceWorkloadPowerProfileSetRequestedProfilesFunc: func(device nvml.Device, workloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles) nvml.Return { // panic("mock out the DeviceWorkloadPowerProfileSetRequestedProfiles method") // }, +// DeviceWorkloadPowerProfileUpdateProfiles_v1Func: func(device nvml.Device, powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return { +// panic("mock out the DeviceWorkloadPowerProfileUpdateProfiles_v1 method") +// }, // ErrorStringFunc: func(returnMoqParam nvml.Return) string { // panic("mock out the ErrorString method") // }, @@ -1447,6 +1465,9 @@ type Interface struct { // DeviceGetHostVgpuModeFunc mocks the DeviceGetHostVgpuMode method. DeviceGetHostVgpuModeFunc func(device nvml.Device) (nvml.HostVgpuMode, nvml.Return) + // DeviceGetHostname_v1Func mocks the DeviceGetHostname_v1 method. + DeviceGetHostname_v1Func func(device nvml.Device) (string, nvml.Return) + // DeviceGetIndexFunc mocks the DeviceGetIndex method. DeviceGetIndexFunc func(device nvml.Device) (int, nvml.Return) @@ -1732,6 +1753,9 @@ type Interface struct { // DeviceGetUUIDFunc mocks the DeviceGetUUID method. DeviceGetUUIDFunc func(device nvml.Device) (string, nvml.Return) + // DeviceGetUnrepairableMemoryFlag_v1Func mocks the DeviceGetUnrepairableMemoryFlag_v1 method. + DeviceGetUnrepairableMemoryFlag_v1Func func(device nvml.Device) (nvml.UnrepairableMemoryStatus_v1, nvml.Return) + // DeviceGetUtilizationRatesFunc mocks the DeviceGetUtilizationRates method. DeviceGetUtilizationRatesFunc func(device nvml.Device) (nvml.Utilization, nvml.Return) @@ -1801,6 +1825,9 @@ type Interface struct { // DeviceQueryDrainStateFunc mocks the DeviceQueryDrainState method. DeviceQueryDrainStateFunc func(pciInfo *nvml.PciInfo) (nvml.EnableState, nvml.Return) + // DeviceReadPRMCounters_v1Func mocks the DeviceReadPRMCounters_v1 method. + DeviceReadPRMCounters_v1Func func(device nvml.Device, pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) + // DeviceReadWritePRM_v1Func mocks the DeviceReadWritePRM_v1 method. DeviceReadWritePRM_v1Func func(device nvml.Device, pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return @@ -1882,6 +1909,9 @@ type Interface struct { // DeviceSetGpuOperationModeFunc mocks the DeviceSetGpuOperationMode method. DeviceSetGpuOperationModeFunc func(device nvml.Device, gpuOperationMode nvml.GpuOperationMode) nvml.Return + // DeviceSetHostname_v1Func mocks the DeviceSetHostname_v1 method. + DeviceSetHostname_v1Func func(device nvml.Device, s string) nvml.Return + // DeviceSetMemClkVfOffsetFunc mocks the DeviceSetMemClkVfOffset method. DeviceSetMemClkVfOffsetFunc func(device nvml.Device, n int) nvml.Return @@ -1909,6 +1939,9 @@ type Interface struct { // DeviceSetPowerManagementLimit_v2Func mocks the DeviceSetPowerManagementLimit_v2 method. DeviceSetPowerManagementLimit_v2Func func(device nvml.Device, powerValue_v2 *nvml.PowerValue_v2) nvml.Return + // DeviceSetRusdSettings_v1Func mocks the DeviceSetRusdSettings_v1 method. + DeviceSetRusdSettings_v1Func func(device nvml.Device, rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return + // DeviceSetTemperatureThresholdFunc mocks the DeviceSetTemperatureThreshold method. DeviceSetTemperatureThresholdFunc func(device nvml.Device, temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return @@ -1939,6 +1972,9 @@ type Interface struct { // DeviceWorkloadPowerProfileSetRequestedProfilesFunc mocks the DeviceWorkloadPowerProfileSetRequestedProfiles method. DeviceWorkloadPowerProfileSetRequestedProfilesFunc func(device nvml.Device, workloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles) nvml.Return + // DeviceWorkloadPowerProfileUpdateProfiles_v1Func mocks the DeviceWorkloadPowerProfileUpdateProfiles_v1 method. + DeviceWorkloadPowerProfileUpdateProfiles_v1Func func(device nvml.Device, powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return + // ErrorStringFunc mocks the ErrorString method. ErrorStringFunc func(returnMoqParam nvml.Return) string @@ -2831,6 +2867,11 @@ type Interface struct { // Device is the device argument value. Device nvml.Device } + // DeviceGetHostname_v1 holds details about calls to the DeviceGetHostname_v1 method. + DeviceGetHostname_v1 []struct { + // Device is the device argument value. + Device nvml.Device + } // DeviceGetIndex holds details about calls to the DeviceGetIndex method. DeviceGetIndex []struct { // Device is the device argument value. @@ -3388,6 +3429,11 @@ type Interface struct { // Device is the device argument value. Device nvml.Device } + // DeviceGetUnrepairableMemoryFlag_v1 holds details about calls to the DeviceGetUnrepairableMemoryFlag_v1 method. + DeviceGetUnrepairableMemoryFlag_v1 []struct { + // Device is the device argument value. + Device nvml.Device + } // DeviceGetUtilizationRates holds details about calls to the DeviceGetUtilizationRates method. DeviceGetUtilizationRates []struct { // Device is the device argument value. @@ -3525,6 +3571,15 @@ type Interface struct { // PciInfo is the pciInfo argument value. PciInfo *nvml.PciInfo } + // DeviceReadPRMCounters_v1 holds details about calls to the DeviceReadPRMCounters_v1 method. + DeviceReadPRMCounters_v1 []struct { + // Device is the device argument value. + Device nvml.Device + // PRMCounterIds is the pRMCounterIds argument value. + PRMCounterIds []nvml.PRMCounterId + // N is the n argument value. + N int + } // DeviceReadWritePRM_v1 holds details about calls to the DeviceReadWritePRM_v1 method. DeviceReadWritePRM_v1 []struct { // Device is the device argument value. @@ -3724,6 +3779,13 @@ type Interface struct { // GpuOperationMode is the gpuOperationMode argument value. GpuOperationMode nvml.GpuOperationMode } + // DeviceSetHostname_v1 holds details about calls to the DeviceSetHostname_v1 method. + DeviceSetHostname_v1 []struct { + // Device is the device argument value. + Device nvml.Device + // S is the s argument value. + S string + } // DeviceSetMemClkVfOffset holds details about calls to the DeviceSetMemClkVfOffset method. DeviceSetMemClkVfOffset []struct { // Device is the device argument value. @@ -3795,6 +3857,13 @@ type Interface struct { // PowerValue_v2 is the powerValue_v2 argument value. PowerValue_v2 *nvml.PowerValue_v2 } + // DeviceSetRusdSettings_v1 holds details about calls to the DeviceSetRusdSettings_v1 method. + DeviceSetRusdSettings_v1 []struct { + // Device is the device argument value. + Device nvml.Device + // RusdSettings_v1 is the rusdSettings_v1 argument value. + RusdSettings_v1 nvml.RusdSettings_v1 + } // DeviceSetTemperatureThreshold holds details about calls to the DeviceSetTemperatureThreshold method. DeviceSetTemperatureThreshold []struct { // Device is the device argument value. @@ -3863,6 +3932,15 @@ type Interface struct { // WorkloadPowerProfileRequestedProfiles is the workloadPowerProfileRequestedProfiles argument value. WorkloadPowerProfileRequestedProfiles *nvml.WorkloadPowerProfileRequestedProfiles } + // DeviceWorkloadPowerProfileUpdateProfiles_v1 holds details about calls to the DeviceWorkloadPowerProfileUpdateProfiles_v1 method. + DeviceWorkloadPowerProfileUpdateProfiles_v1 []struct { + // Device is the device argument value. + Device nvml.Device + // PowerProfileOperation is the powerProfileOperation argument value. + PowerProfileOperation nvml.PowerProfileOperation + // PowerProfileTypes is the powerProfileTypes argument value. + PowerProfileTypes []nvml.PowerProfileType + } // ErrorString holds details about calls to the ErrorString method. ErrorString []struct { // ReturnMoqParam is the returnMoqParam argument value. @@ -4526,6 +4604,7 @@ type Interface struct { lockDeviceGetHandleByUUID sync.RWMutex lockDeviceGetHandleByUUIDV sync.RWMutex lockDeviceGetHostVgpuMode sync.RWMutex + lockDeviceGetHostname_v1 sync.RWMutex lockDeviceGetIndex sync.RWMutex lockDeviceGetInforomConfigurationChecksum sync.RWMutex lockDeviceGetInforomImageVersion sync.RWMutex @@ -4621,6 +4700,7 @@ type Interface struct { lockDeviceGetTotalEccErrors sync.RWMutex lockDeviceGetTotalEnergyConsumption sync.RWMutex lockDeviceGetUUID sync.RWMutex + lockDeviceGetUnrepairableMemoryFlag_v1 sync.RWMutex lockDeviceGetUtilizationRates sync.RWMutex lockDeviceGetVbiosVersion sync.RWMutex lockDeviceGetVgpuCapabilities sync.RWMutex @@ -4644,6 +4724,7 @@ type Interface struct { lockDevicePowerSmoothingSetState sync.RWMutex lockDevicePowerSmoothingUpdatePresetProfileParam sync.RWMutex lockDeviceQueryDrainState sync.RWMutex + lockDeviceReadPRMCounters_v1 sync.RWMutex lockDeviceReadWritePRM_v1 sync.RWMutex lockDeviceRegisterEvents sync.RWMutex lockDeviceRemoveGpu sync.RWMutex @@ -4671,6 +4752,7 @@ type Interface struct { lockDeviceSetGpcClkVfOffset sync.RWMutex lockDeviceSetGpuLockedClocks sync.RWMutex lockDeviceSetGpuOperationMode sync.RWMutex + lockDeviceSetHostname_v1 sync.RWMutex lockDeviceSetMemClkVfOffset sync.RWMutex lockDeviceSetMemoryLockedClocks sync.RWMutex lockDeviceSetMigMode sync.RWMutex @@ -4680,6 +4762,7 @@ type Interface struct { lockDeviceSetPersistenceMode sync.RWMutex lockDeviceSetPowerManagementLimit sync.RWMutex lockDeviceSetPowerManagementLimit_v2 sync.RWMutex + lockDeviceSetRusdSettings_v1 sync.RWMutex lockDeviceSetTemperatureThreshold sync.RWMutex lockDeviceSetVgpuCapabilities sync.RWMutex lockDeviceSetVgpuHeterogeneousMode sync.RWMutex @@ -4690,6 +4773,7 @@ type Interface struct { lockDeviceWorkloadPowerProfileGetCurrentProfiles sync.RWMutex lockDeviceWorkloadPowerProfileGetProfilesInfo sync.RWMutex lockDeviceWorkloadPowerProfileSetRequestedProfiles sync.RWMutex + lockDeviceWorkloadPowerProfileUpdateProfiles_v1 sync.RWMutex lockErrorString sync.RWMutex lockEventSetCreate sync.RWMutex lockEventSetFree sync.RWMutex @@ -8120,6 +8204,38 @@ func (mock *Interface) DeviceGetHostVgpuModeCalls() []struct { return calls } +// DeviceGetHostname_v1 calls DeviceGetHostname_v1Func. +func (mock *Interface) DeviceGetHostname_v1(device nvml.Device) (string, nvml.Return) { + if mock.DeviceGetHostname_v1Func == nil { + panic("Interface.DeviceGetHostname_v1Func: method is nil but Interface.DeviceGetHostname_v1 was just called") + } + callInfo := struct { + Device nvml.Device + }{ + Device: device, + } + mock.lockDeviceGetHostname_v1.Lock() + mock.calls.DeviceGetHostname_v1 = append(mock.calls.DeviceGetHostname_v1, callInfo) + mock.lockDeviceGetHostname_v1.Unlock() + return mock.DeviceGetHostname_v1Func(device) +} + +// DeviceGetHostname_v1Calls gets all the calls that were made to DeviceGetHostname_v1. +// Check the length with: +// +// len(mockedInterface.DeviceGetHostname_v1Calls()) +func (mock *Interface) DeviceGetHostname_v1Calls() []struct { + Device nvml.Device +} { + var calls []struct { + Device nvml.Device + } + mock.lockDeviceGetHostname_v1.RLock() + calls = mock.calls.DeviceGetHostname_v1 + mock.lockDeviceGetHostname_v1.RUnlock() + return calls +} + // DeviceGetIndex calls DeviceGetIndexFunc. func (mock *Interface) DeviceGetIndex(device nvml.Device) (int, nvml.Return) { if mock.DeviceGetIndexFunc == nil { @@ -11324,6 +11440,38 @@ func (mock *Interface) DeviceGetUUIDCalls() []struct { return calls } +// DeviceGetUnrepairableMemoryFlag_v1 calls DeviceGetUnrepairableMemoryFlag_v1Func. +func (mock *Interface) DeviceGetUnrepairableMemoryFlag_v1(device nvml.Device) (nvml.UnrepairableMemoryStatus_v1, nvml.Return) { + if mock.DeviceGetUnrepairableMemoryFlag_v1Func == nil { + panic("Interface.DeviceGetUnrepairableMemoryFlag_v1Func: method is nil but Interface.DeviceGetUnrepairableMemoryFlag_v1 was just called") + } + callInfo := struct { + Device nvml.Device + }{ + Device: device, + } + mock.lockDeviceGetUnrepairableMemoryFlag_v1.Lock() + mock.calls.DeviceGetUnrepairableMemoryFlag_v1 = append(mock.calls.DeviceGetUnrepairableMemoryFlag_v1, callInfo) + mock.lockDeviceGetUnrepairableMemoryFlag_v1.Unlock() + return mock.DeviceGetUnrepairableMemoryFlag_v1Func(device) +} + +// DeviceGetUnrepairableMemoryFlag_v1Calls gets all the calls that were made to DeviceGetUnrepairableMemoryFlag_v1. +// Check the length with: +// +// len(mockedInterface.DeviceGetUnrepairableMemoryFlag_v1Calls()) +func (mock *Interface) DeviceGetUnrepairableMemoryFlag_v1Calls() []struct { + Device nvml.Device +} { + var calls []struct { + Device nvml.Device + } + mock.lockDeviceGetUnrepairableMemoryFlag_v1.RLock() + calls = mock.calls.DeviceGetUnrepairableMemoryFlag_v1 + mock.lockDeviceGetUnrepairableMemoryFlag_v1.RUnlock() + return calls +} + // DeviceGetUtilizationRates calls DeviceGetUtilizationRatesFunc. func (mock *Interface) DeviceGetUtilizationRates(device nvml.Device) (nvml.Utilization, nvml.Return) { if mock.DeviceGetUtilizationRatesFunc == nil { @@ -12104,6 +12252,46 @@ func (mock *Interface) DeviceQueryDrainStateCalls() []struct { return calls } +// DeviceReadPRMCounters_v1 calls DeviceReadPRMCounters_v1Func. +func (mock *Interface) DeviceReadPRMCounters_v1(device nvml.Device, pRMCounterIds []nvml.PRMCounterId, n int) ([]nvml.PRMCounter_v1, nvml.Return) { + if mock.DeviceReadPRMCounters_v1Func == nil { + panic("Interface.DeviceReadPRMCounters_v1Func: method is nil but Interface.DeviceReadPRMCounters_v1 was just called") + } + callInfo := struct { + Device nvml.Device + PRMCounterIds []nvml.PRMCounterId + N int + }{ + Device: device, + PRMCounterIds: pRMCounterIds, + N: n, + } + mock.lockDeviceReadPRMCounters_v1.Lock() + mock.calls.DeviceReadPRMCounters_v1 = append(mock.calls.DeviceReadPRMCounters_v1, callInfo) + mock.lockDeviceReadPRMCounters_v1.Unlock() + return mock.DeviceReadPRMCounters_v1Func(device, pRMCounterIds, n) +} + +// DeviceReadPRMCounters_v1Calls gets all the calls that were made to DeviceReadPRMCounters_v1. +// Check the length with: +// +// len(mockedInterface.DeviceReadPRMCounters_v1Calls()) +func (mock *Interface) DeviceReadPRMCounters_v1Calls() []struct { + Device nvml.Device + PRMCounterIds []nvml.PRMCounterId + N int +} { + var calls []struct { + Device nvml.Device + PRMCounterIds []nvml.PRMCounterId + N int + } + mock.lockDeviceReadPRMCounters_v1.RLock() + calls = mock.calls.DeviceReadPRMCounters_v1 + mock.lockDeviceReadPRMCounters_v1.RUnlock() + return calls +} + // DeviceReadWritePRM_v1 calls DeviceReadWritePRM_v1Func. func (mock *Interface) DeviceReadWritePRM_v1(device nvml.Device, pRMTLV_v1 *nvml.PRMTLV_v1) nvml.Return { if mock.DeviceReadWritePRM_v1Func == nil { @@ -13096,6 +13284,42 @@ func (mock *Interface) DeviceSetGpuOperationModeCalls() []struct { return calls } +// DeviceSetHostname_v1 calls DeviceSetHostname_v1Func. +func (mock *Interface) DeviceSetHostname_v1(device nvml.Device, s string) nvml.Return { + if mock.DeviceSetHostname_v1Func == nil { + panic("Interface.DeviceSetHostname_v1Func: method is nil but Interface.DeviceSetHostname_v1 was just called") + } + callInfo := struct { + Device nvml.Device + S string + }{ + Device: device, + S: s, + } + mock.lockDeviceSetHostname_v1.Lock() + mock.calls.DeviceSetHostname_v1 = append(mock.calls.DeviceSetHostname_v1, callInfo) + mock.lockDeviceSetHostname_v1.Unlock() + return mock.DeviceSetHostname_v1Func(device, s) +} + +// DeviceSetHostname_v1Calls gets all the calls that were made to DeviceSetHostname_v1. +// Check the length with: +// +// len(mockedInterface.DeviceSetHostname_v1Calls()) +func (mock *Interface) DeviceSetHostname_v1Calls() []struct { + Device nvml.Device + S string +} { + var calls []struct { + Device nvml.Device + S string + } + mock.lockDeviceSetHostname_v1.RLock() + calls = mock.calls.DeviceSetHostname_v1 + mock.lockDeviceSetHostname_v1.RUnlock() + return calls +} + // DeviceSetMemClkVfOffset calls DeviceSetMemClkVfOffsetFunc. func (mock *Interface) DeviceSetMemClkVfOffset(device nvml.Device, n int) nvml.Return { if mock.DeviceSetMemClkVfOffsetFunc == nil { @@ -13436,6 +13660,42 @@ func (mock *Interface) DeviceSetPowerManagementLimit_v2Calls() []struct { return calls } +// DeviceSetRusdSettings_v1 calls DeviceSetRusdSettings_v1Func. +func (mock *Interface) DeviceSetRusdSettings_v1(device nvml.Device, rusdSettings_v1 nvml.RusdSettings_v1) nvml.Return { + if mock.DeviceSetRusdSettings_v1Func == nil { + panic("Interface.DeviceSetRusdSettings_v1Func: method is nil but Interface.DeviceSetRusdSettings_v1 was just called") + } + callInfo := struct { + Device nvml.Device + RusdSettings_v1 nvml.RusdSettings_v1 + }{ + Device: device, + RusdSettings_v1: rusdSettings_v1, + } + mock.lockDeviceSetRusdSettings_v1.Lock() + mock.calls.DeviceSetRusdSettings_v1 = append(mock.calls.DeviceSetRusdSettings_v1, callInfo) + mock.lockDeviceSetRusdSettings_v1.Unlock() + return mock.DeviceSetRusdSettings_v1Func(device, rusdSettings_v1) +} + +// DeviceSetRusdSettings_v1Calls gets all the calls that were made to DeviceSetRusdSettings_v1. +// Check the length with: +// +// len(mockedInterface.DeviceSetRusdSettings_v1Calls()) +func (mock *Interface) DeviceSetRusdSettings_v1Calls() []struct { + Device nvml.Device + RusdSettings_v1 nvml.RusdSettings_v1 +} { + var calls []struct { + Device nvml.Device + RusdSettings_v1 nvml.RusdSettings_v1 + } + mock.lockDeviceSetRusdSettings_v1.RLock() + calls = mock.calls.DeviceSetRusdSettings_v1 + mock.lockDeviceSetRusdSettings_v1.RUnlock() + return calls +} + // DeviceSetTemperatureThreshold calls DeviceSetTemperatureThresholdFunc. func (mock *Interface) DeviceSetTemperatureThreshold(device nvml.Device, temperatureThresholds nvml.TemperatureThresholds, n int) nvml.Return { if mock.DeviceSetTemperatureThresholdFunc == nil { @@ -13792,6 +14052,46 @@ func (mock *Interface) DeviceWorkloadPowerProfileSetRequestedProfilesCalls() []s return calls } +// DeviceWorkloadPowerProfileUpdateProfiles_v1 calls DeviceWorkloadPowerProfileUpdateProfiles_v1Func. +func (mock *Interface) DeviceWorkloadPowerProfileUpdateProfiles_v1(device nvml.Device, powerProfileOperation nvml.PowerProfileOperation, powerProfileTypes []nvml.PowerProfileType) nvml.Return { + if mock.DeviceWorkloadPowerProfileUpdateProfiles_v1Func == nil { + panic("Interface.DeviceWorkloadPowerProfileUpdateProfiles_v1Func: method is nil but Interface.DeviceWorkloadPowerProfileUpdateProfiles_v1 was just called") + } + callInfo := struct { + Device nvml.Device + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType + }{ + Device: device, + PowerProfileOperation: powerProfileOperation, + PowerProfileTypes: powerProfileTypes, + } + mock.lockDeviceWorkloadPowerProfileUpdateProfiles_v1.Lock() + mock.calls.DeviceWorkloadPowerProfileUpdateProfiles_v1 = append(mock.calls.DeviceWorkloadPowerProfileUpdateProfiles_v1, callInfo) + mock.lockDeviceWorkloadPowerProfileUpdateProfiles_v1.Unlock() + return mock.DeviceWorkloadPowerProfileUpdateProfiles_v1Func(device, powerProfileOperation, powerProfileTypes) +} + +// DeviceWorkloadPowerProfileUpdateProfiles_v1Calls gets all the calls that were made to DeviceWorkloadPowerProfileUpdateProfiles_v1. +// Check the length with: +// +// len(mockedInterface.DeviceWorkloadPowerProfileUpdateProfiles_v1Calls()) +func (mock *Interface) DeviceWorkloadPowerProfileUpdateProfiles_v1Calls() []struct { + Device nvml.Device + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType +} { + var calls []struct { + Device nvml.Device + PowerProfileOperation nvml.PowerProfileOperation + PowerProfileTypes []nvml.PowerProfileType + } + mock.lockDeviceWorkloadPowerProfileUpdateProfiles_v1.RLock() + calls = mock.calls.DeviceWorkloadPowerProfileUpdateProfiles_v1 + mock.lockDeviceWorkloadPowerProfileUpdateProfiles_v1.RUnlock() + return calls +} + // ErrorString calls ErrorStringFunc. func (mock *Interface) ErrorString(returnMoqParam nvml.Return) string { if mock.ErrorStringFunc == nil { diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go index 38123a9..b3877b4 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.go @@ -396,6 +396,15 @@ func nvmlDeviceGetRepairStatus(nvmlDevice nvmlDevice, RepairStatus *RepairStatus return __v } +// nvmlDeviceGetUnrepairableMemoryFlag_v1 function as declared in nvml/nvml.h +func nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice nvmlDevice, UnrepairableMemoryStatus *UnrepairableMemoryStatus_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cUnrepairableMemoryStatus, _ := (*C.nvmlUnrepairableMemoryStatus_v1_t)(unsafe.Pointer(UnrepairableMemoryStatus)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetUnrepairableMemoryFlag_v1(cnvmlDevice, cUnrepairableMemoryStatus) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceGetTopologyCommonAncestor function as declared in nvml/nvml.h func nvmlDeviceGetTopologyCommonAncestor(Device1 nvmlDevice, Device2 nvmlDevice, PathInfo *GpuTopologyLevel) Return { cDevice1, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&Device1)), cgoAllocsUnknown @@ -1802,6 +1811,24 @@ func nvmlDeviceGetPdi(nvmlDevice nvmlDevice, Pdi *Pdi) Return { return __v } +// nvmlDeviceSetHostname_v1 function as declared in nvml/nvml.h +func nvmlDeviceSetHostname_v1(nvmlDevice nvmlDevice, Hostname *Hostname_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cHostname, _ := (*C.nvmlHostname_v1_t)(unsafe.Pointer(Hostname)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetHostname_v1(cnvmlDevice, cHostname) + __v := (Return)(__ret) + return __v +} + +// nvmlDeviceGetHostname_v1 function as declared in nvml/nvml.h +func nvmlDeviceGetHostname_v1(nvmlDevice nvmlDevice, Hostname *Hostname_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cHostname, _ := (*C.nvmlHostname_v1_t)(unsafe.Pointer(Hostname)), cgoAllocsUnknown + __ret := C.nvmlDeviceGetHostname_v1(cnvmlDevice, cHostname) + __v := (Return)(__ret) + return __v +} + // nvmlUnitSetLedState function as declared in nvml/nvml.h func nvmlUnitSetLedState(nvmlUnit nvmlUnit, Color LedColor) Return { cnvmlUnit, _ := *(*C.nvmlUnit_t)(unsafe.Pointer(&nvmlUnit)), cgoAllocsUnknown @@ -3096,6 +3123,15 @@ func nvmlDeviceReadWritePRM_v1(nvmlDevice nvmlDevice, Buffer *PRMTLV_v1) Return return __v } +// nvmlDeviceReadPRMCounters_v1 function as declared in nvml/nvml.h +func nvmlDeviceReadPRMCounters_v1(nvmlDevice nvmlDevice, CounterList *PRMCounterList_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cCounterList, _ := (*C.nvmlPRMCounterList_v1_t)(unsafe.Pointer(CounterList)), cgoAllocsUnknown + __ret := C.nvmlDeviceReadPRMCounters_v1(cnvmlDevice, cCounterList) + __v := (Return)(__ret) + return __v +} + // nvmlDeviceSetMigMode function as declared in nvml/nvml.h func nvmlDeviceSetMigMode(nvmlDevice nvmlDevice, Mode uint32, ActivationStatus *Return) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -3498,6 +3534,15 @@ func nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(nvmlDevice nvmlDevice, return __v } +// nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1 function as declared in nvml/nvml.h +func nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1(nvmlDevice nvmlDevice, UpdateProfiles *WorkloadPowerProfileUpdateProfiles_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cUpdateProfiles, _ := (*C.nvmlWorkloadPowerProfileUpdateProfiles_v1_t)(unsafe.Pointer(UpdateProfiles)), cgoAllocsUnknown + __ret := C.nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1(cnvmlDevice, cUpdateProfiles) + __v := (Return)(__ret) + return __v +} + // nvmlDevicePowerSmoothingActivatePresetProfile function as declared in nvml/nvml.h func nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice nvmlDevice, Profile *PowerSmoothingProfile) Return { cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown @@ -3534,6 +3579,15 @@ func nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice nvmlDevice, Err return __v } +// nvmlDeviceSetRusdSettings_v1 function as declared in nvml/nvml.h +func nvmlDeviceSetRusdSettings_v1(nvmlDevice nvmlDevice, Settings *RusdSettings_v1) Return { + cnvmlDevice, _ := *(*C.nvmlDevice_t)(unsafe.Pointer(&nvmlDevice)), cgoAllocsUnknown + cSettings, _ := (*C.nvmlRusdSettings_v1_t)(unsafe.Pointer(Settings)), cgoAllocsUnknown + __ret := C.nvmlDeviceSetRusdSettings_v1(cnvmlDevice, cSettings) + __v := (Return)(__ret) + return __v +} + // nvmlInit_v1 function as declared in nvml/nvml.h func nvmlInit_v1() Return { __ret := C.nvmlInit() diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h index 917a8c9..6b13c97 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/nvml.h @@ -1,5 +1,5 @@ -/*** NVML VERSION: 13.0.39 ***/ -/*** From https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvml_dev/linux-x86_64/cuda_nvml_dev-linux-x86_64-13.0.39-archive.tar.xz ***/ +/*** NVML VERSION: 13.1.115 ***/ +/*** From https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvml_dev/linux-x86_64/cuda_nvml_dev-linux-x86_64-13.1.115-archive.tar.xz ***/ /* * Copyright 1993-2025 NVIDIA Corporation. All rights reserved. * @@ -80,7 +80,7 @@ extern "C" { * On Windows, set up methods for DLL export * define NVML_STATIC_IMPORT when using nvml_loader library */ -#if defined _WINDOWS +#if defined(_WINDOWS) || defined(_WIN32) #if !defined NVML_STATIC_IMPORT #if defined NVML_LIB_EXPORT #define DECLDIR __declspec(dllexport) @@ -95,22 +95,21 @@ extern "C" { #endif /* - * Deprecation definition. Starting CUDA 13.1 this will change to: - * #if defined _WINDOWS - * #define DEPRECATED(ver) __declspec(deprecated) - * #else - * #define DEPRECATED(ver) __attribute__((deprecated)) - * #endif + * Deprecation definition. */ -#define DEPRECATED(ver) /* nop in CUDA 13.0, enabled in CUDA 13.1 */ +#if defined(_WINDOWS) || defined(_WIN32) + #define DEPRECATED(ver) __declspec(deprecated) +#else + #define DEPRECATED(ver) __attribute__((deprecated)) +#endif - #define NVML_MCDM_SUPPORT + #define NVML_MCDM_SUPPORT //!< Definition to enable MCDM support. /** * NVML API versioning support */ -#define NVML_API_VERSION 13 -#define NVML_API_VERSION_STR "13" +#define NVML_API_VERSION 13 //!< NVML API version identifier. +#define NVML_API_VERSION_STR "13" //!< NVML API version identifier as a string. /** * Defining NVML_NO_UNVERSIONED_FUNC_DEFS will disable "auto upgrading" of APIs. * e.g. the user will have to call nvmlInit_v2 instead of nvmlInit. Enable this @@ -154,7 +153,7 @@ extern "C" { * * Each structure explicitly states when to check for this value. */ -#define NVML_VALUE_NOT_AVAILABLE (-1) +#define NVML_VALUE_NOT_AVAILABLE (-1) //!< Macro for unavailable values. typedef struct { @@ -169,12 +168,12 @@ typedef struct /** * Buffer size guaranteed to be large enough for pci bus id */ -#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 32 +#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 32 //!< Buffer size for PCI bus ID. /** * Buffer size guaranteed to be large enough for pci bus id for \p busIdLegacy */ -#define NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE 16 +#define NVML_DEVICE_PCI_BUS_ID_BUFFER_V2_SIZE 16 //!< Buffer size for legacy PCI bus ID. /** * PCI information about a GPU device. @@ -195,7 +194,7 @@ typedef struct char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple domain:bus:device.function PCI identifier (& NULL terminator) } nvmlPciInfoExt_v1_t; typedef nvmlPciInfoExt_v1_t nvmlPciInfoExt_t; -#define nvmlPciInfoExt_v1 NVML_STRUCT_VERSION(PciInfoExt, 1) +#define nvmlPciInfoExt_v1 NVML_STRUCT_VERSION(PciInfoExt, 1) //!< Version macro for \a nvmlPciInfoExt_v1_t /** * PCI information about a GPU device. @@ -217,19 +216,19 @@ typedef struct nvmlPciInfo_st /** * PCI format string for \p busIdLegacy */ -#define NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT "%04X:%02X:%02X.0" +#define NVML_DEVICE_PCI_BUS_ID_LEGACY_FMT "%04X:%02X:%02X.0" //!< Legacy PCI bus ID format. /** * PCI format string for \p busId */ -#define NVML_DEVICE_PCI_BUS_ID_FMT "%08X:%02X:%02X.0" +#define NVML_DEVICE_PCI_BUS_ID_FMT "%08X:%02X:%02X.0" //!< PCI bus ID format. /** * Utility macro for filling the pci bus id format from a nvmlPciInfo_t */ #define NVML_DEVICE_PCI_BUS_ID_FMT_ARGS(pciInfo) (pciInfo)->domain, \ (pciInfo)->bus, \ - (pciInfo)->device + (pciInfo)->device //!< Macro for formatting PCI bus ID arguments. /** * Detailed ECC error counts for a device. @@ -281,7 +280,7 @@ typedef struct nvmlMemory_v2_st unsigned long long used; //!< Allocated device memory (in bytes). } nvmlMemory_v2_t; -#define nvmlMemory_v2 NVML_STRUCT_VERSION(Memory, 2) +#define nvmlMemory_v2 NVML_STRUCT_VERSION(Memory, 2) //!< Version macro for \a nvmlMemory_v2_t /** * BAR1 Memory allocation Information for a device @@ -352,7 +351,7 @@ typedef nvmlProcessDetailList_v1_t nvmlProcessDetailList_t; /** * nvmlProcessDetailList version */ -#define nvmlProcessDetailList_v1 NVML_STRUCT_VERSION(ProcessDetailList, 1) +#define nvmlProcessDetailList_v1 NVML_STRUCT_VERSION(ProcessDetailList, 1) //!< Version macro for \a nvmlProcessDetailList_v1_t typedef struct nvmlDeviceAttributes_st { @@ -375,7 +374,7 @@ typedef struct unsigned int isC2cEnabled; } nvmlC2cModeInfo_v1_t; -#define nvmlC2cModeInfo_v1 NVML_STRUCT_VERSION(C2cModeInfo, 1) +#define nvmlC2cModeInfo_v1 NVML_STRUCT_VERSION(C2cModeInfo, 1) //!< Version macro for \a nvmlC2cModeInfo_v1_t /** * Enum to represent device addressing mode values @@ -397,7 +396,7 @@ typedef struct } nvmlDeviceAddressingMode_v1_t; typedef nvmlDeviceAddressingMode_v1_t nvmlDeviceAddressingMode_t; -#define nvmlDeviceAddressingMode_v1 NVML_STRUCT_VERSION(DeviceAddressingMode, 1) +#define nvmlDeviceAddressingMode_v1 NVML_STRUCT_VERSION(DeviceAddressingMode, 1) //!< Version macro for \a nvmlDeviceAddressingMode_v1_t /** * Struct to represent the NVML repair status @@ -410,7 +409,15 @@ typedef struct } nvmlRepairStatus_v1_t; typedef nvmlRepairStatus_v1_t nvmlRepairStatus_t; -#define nvmlRepairStatus_v1 NVML_STRUCT_VERSION(RepairStatus, 1) +#define nvmlRepairStatus_v1 NVML_STRUCT_VERSION(RepairStatus, 1) //!< Version macro for \a nvmlRepairStatus_v1_t + +/** + * Struct to represent the NVML unrepairable memory status + */ +typedef struct +{ + unsigned int bUnrepairableMemory; //!< Reference to \a unsigned int +} nvmlUnrepairableMemoryStatus_v1_t; /** * Possible values that classify the remap availability for each bank. The max @@ -439,7 +446,7 @@ typedef enum nvmlBridgeChipType_enum /** * Maximum number of NvLink links supported */ -#define NVML_NVLINK_MAX_LINKS 18 +#define NVML_NVLINK_MAX_LINKS 18 //!< Maximum number of NVLink links supported. /** * Enum to represent the NvLink utilization counter packet units @@ -541,7 +548,7 @@ typedef enum nvmlGpuLevel_enum } nvmlGpuTopologyLevel_t; /* Compatibility for CPU->NODE renaming */ -#define NVML_TOPOLOGY_CPU NVML_TOPOLOGY_NODE +#define NVML_TOPOLOGY_CPU NVML_TOPOLOGY_NODE //!< Topology level for node. /* P2P Capability Index Status*/ typedef enum nvmlGpuP2PStatus_enum @@ -576,7 +583,7 @@ typedef enum nvmlGpuP2PCapsIndex_enum /** * Maximum limit on Physical Bridges per Board */ -#define NVML_MAX_PHYSICAL_BRIDGE (128) +#define NVML_MAX_PHYSICAL_BRIDGE (128) //!< Maximum number of physical bridges. /** * Information about the Bridge Chip Firmware @@ -697,7 +704,7 @@ typedef struct nvmlViolationTime_st unsigned long long violationTime; //!< violationTime in Nanoseconds }nvmlViolationTime_t; -#define NVML_MAX_THERMAL_SENSORS_PER_GPU 3 +#define NVML_MAX_THERMAL_SENSORS_PER_GPU 3 //!< Maximum number of thermal sensors per GPU. /** * Represents the thermal sensor targets @@ -746,17 +753,18 @@ typedef enum /** * Struct to hold the thermal sensor settings */ +typedef struct { + nvmlThermalController_t controller; + int defaultMinTemp; + int defaultMaxTemp; + int currentTemp; + nvmlThermalTarget_t target; +} nvmlGpuThermalSettingsSensor_t; + typedef struct { unsigned int count; - struct - { - nvmlThermalController_t controller; - int defaultMinTemp; - int defaultMaxTemp; - int currentTemp; - nvmlThermalTarget_t target; - } sensor[NVML_MAX_THERMAL_SENSORS_PER_GPU]; + nvmlGpuThermalSettingsSensor_t sensor[NVML_MAX_THERMAL_SENSORS_PER_GPU]; } nvmlGpuThermalSettings_t; @@ -794,17 +802,17 @@ typedef struct } nvmlCoolerInfo_v1_t; typedef nvmlCoolerInfo_v1_t nvmlCoolerInfo_t; -#define nvmlCoolerInfo_v1 NVML_STRUCT_VERSION(CoolerInfo, 1) +#define nvmlCoolerInfo_v1 NVML_STRUCT_VERSION(CoolerInfo, 1) //!< Version macro for \a nvmlCoolerInfo_v1_t /** * UUID length in ASCII format */ -#define NVML_DEVICE_UUID_ASCII_LEN 41 +#define NVML_DEVICE_UUID_ASCII_LEN 41 //!< Length of UUID in ASCII format. /** * UUID length in binary format */ -#define NVML_DEVICE_UUID_BINARY_LEN 16 +#define NVML_DEVICE_UUID_BINARY_LEN 16 //!< Length of UUID in binary format. /** * Enum to represent different UUID types @@ -836,7 +844,7 @@ typedef struct } nvmlUUID_v1_t; typedef nvmlUUID_v1_t nvmlUUID_t; -#define nvmlUUID_v1 NVML_STRUCT_VERSION(UUID, 1) +#define nvmlUUID_v1 NVML_STRUCT_VERSION(UUID, 1) //!< Version macro for \a nvmlUUID_v1_t /** * Struct to represent the NVML PDI information @@ -848,7 +856,7 @@ typedef struct } nvmlPdi_v1_t; typedef nvmlPdi_v1_t nvmlPdi_t; -#define nvmlPdi_v1 NVML_STRUCT_VERSION(Pdi, 1) +#define nvmlPdi_v1 NVML_STRUCT_VERSION(Pdi, 1) //!< Version macro for \a nvmlPdi_v1_t /** @} */ @@ -868,9 +876,9 @@ typedef enum nvmlEnableState_enum } nvmlEnableState_t; //! Generic flag used to specify the default behavior of some functions. See description of particular functions for details. -#define nvmlFlagDefault 0x00 +#define nvmlFlagDefault 0x00 //!< Default flag. //! Generic flag used to force some behavior. See description of particular functions for details. -#define nvmlFlagForce 0x01 +#define nvmlFlagForce 0x01 //!< Force flag. /** * DRAM Encryption Info @@ -882,7 +890,7 @@ typedef struct } nvmlDramEncryptionInfo_v1_t; typedef nvmlDramEncryptionInfo_v1_t nvmlDramEncryptionInfo_t; -#define nvmlDramEncryptionInfo_v1 NVML_STRUCT_VERSION(DramEncryptionInfo, 1) +#define nvmlDramEncryptionInfo_v1 NVML_STRUCT_VERSION(DramEncryptionInfo, 1) //!< Version macro for \a nvmlDramEncryptionInfo_v1_t /** * * The Brand of the GPU @@ -958,7 +966,7 @@ typedef struct typedef nvmlMarginTemperature_v1_t nvmlMarginTemperature_t; -#define nvmlMarginTemperature_v1 NVML_STRUCT_VERSION(MarginTemperature, 1) +#define nvmlMarginTemperature_v1 NVML_STRUCT_VERSION(MarginTemperature, 1) //!< Version macro for \a nvmlMarginTemperature_v1_t /** * Compute mode. @@ -981,7 +989,7 @@ typedef enum nvmlComputeMode_enum /** * Max Clock Monitors available */ -#define MAX_CLK_DOMAINS 32 +#define MAX_CLK_DOMAINS 32 //!< Maximum number of clock domains. /** * Clock Monitor error types @@ -1023,21 +1031,21 @@ typedef struct nvmlClkMonStatus_status { * * @deprecated See \ref nvmlMemoryErrorType_t for a more flexible type */ -#define nvmlEccBitType_t nvmlMemoryErrorType_t +#define nvmlEccBitType_t nvmlMemoryErrorType_t //!< Deprecated ECC bit type. See \ref nvmlMemoryErrorType_t. /** * Single bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_CORRECTED */ -#define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED +#define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED //!< Deprecated single bit ECC error. See \ref NVML_MEMORY_ERROR_TYPE_CORRECTED. /** * Double bit ECC errors * * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED */ -#define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED +#define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED //!< Deprecated double bit ECC error. See \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED. /** * Memory error types @@ -1064,21 +1072,6 @@ typedef enum nvmlMemoryErrorType_enum } nvmlMemoryErrorType_t; -/** - * Represents Nvlink Version - */ -typedef enum nvmlNvlinkVersion_enum -{ - NVML_NVLINK_VERSION_INVALID = 0, - NVML_NVLINK_VERSION_1_0 = 1, - NVML_NVLINK_VERSION_2_0 = 2, - NVML_NVLINK_VERSION_2_2 = 3, - NVML_NVLINK_VERSION_3_0 = 4, - NVML_NVLINK_VERSION_3_1 = 5, - NVML_NVLINK_VERSION_4_0 = 6, - NVML_NVLINK_VERSION_5_0 = 7, -}nvmlNvlinkVersion_t; - /** * ECC counter types. * @@ -1142,7 +1135,7 @@ typedef enum nvmlDriverModel_enum NVML_DRIVER_MCDM = 2 //!< MCDM driver model -- GPU treated as a Microsoft compute device } nvmlDriverModel_t; -#define NVML_MAX_GPU_PERF_PSTATES 16 +#define NVML_MAX_GPU_PERF_PSTATES 16 //!< Maximum number of GPU performance states. /** * Allowed PStates. @@ -1183,7 +1176,7 @@ typedef struct typedef nvmlClockOffset_v1_t nvmlClockOffset_t; -#define nvmlClockOffset_v1 NVML_STRUCT_VERSION(ClockOffset, 1) +#define nvmlClockOffset_v1 NVML_STRUCT_VERSION(ClockOffset, 1) //!< Version macro for \a nvmlClockOffset_v1_t /** * Fan speed info. @@ -1196,9 +1189,9 @@ typedef struct } nvmlFanSpeedInfo_v1_t; typedef nvmlFanSpeedInfo_v1_t nvmlFanSpeedInfo_t; -#define nvmlFanSpeedInfo_v1 NVML_STRUCT_VERSION(FanSpeedInfo, 1) +#define nvmlFanSpeedInfo_v1 NVML_STRUCT_VERSION(FanSpeedInfo, 1) //!< Version macro for \a nvmlFanSpeedInfo_v1_t -#define NVML_PERF_MODES_BUFFER_SIZE 2048 +#define NVML_PERF_MODES_BUFFER_SIZE 2048 //!< Buffer size for performance modes strings. /** * Device performance modes string @@ -1210,7 +1203,7 @@ typedef struct } nvmlDevicePerfModes_v1_t; typedef nvmlDevicePerfModes_v1_t nvmlDevicePerfModes_t; -#define nvmlDevicePerfModes_v1 NVML_STRUCT_VERSION(DevicePerfModes, 1) +#define nvmlDevicePerfModes_v1 NVML_STRUCT_VERSION(DevicePerfModes, 1) //!< Version macro for \a nvmlDevicePerfModes_v1_t /** * Device current clocks string @@ -1222,7 +1215,7 @@ typedef struct } nvmlDeviceCurrentClockFreqs_v1_t; typedef nvmlDeviceCurrentClockFreqs_v1_t nvmlDeviceCurrentClockFreqs_t; -#define nvmlDeviceCurrentClockFreqs_v1 NVML_STRUCT_VERSION(DeviceCurrentClockFreqs, 1) +#define nvmlDeviceCurrentClockFreqs_v1 NVML_STRUCT_VERSION(DeviceCurrentClockFreqs, 1) //!< Version macro for \a nvmlDeviceCurrentClockFreqs_v1_t /** * Device powerMizer modes @@ -1237,6 +1230,19 @@ typedef struct { unsigned int currentMode; //!< OUT: the current powermizer mode unsigned int mode; //!< IN: the powermizer mode to set + + /** + * The bitmask of supported power mizer modes on this device. + * The supported modes can be combined using the bitwise OR operator '|'. + * For example, if a device supports all PowerMizer modes, the bitmask would be: + * supportedPowerMizerModes = ((1 << NVML_POWER_MIZER_MODE_ADAPTIVE) | + * (1 << NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE) | + * (1 << NVML_POWER_MIZER_MODE_AUTO) | + * (1 << NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE)); + * + * This bitmask can be used to check which power mizer modes are available on the device by performing + * a bitwise AND operation with the specific mode you want to check. + */ unsigned int supportedPowerMizerModes; //!< OUT: Bitmask of supported powermizer modes } nvmlDevicePowerMizerModes_v1_t; @@ -1417,7 +1423,7 @@ typedef struct } nvmlEccSramErrorStatus_v1_t; typedef nvmlEccSramErrorStatus_v1_t nvmlEccSramErrorStatus_t; -#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) +#define nvmlEccSramErrorStatus_v1 NVML_STRUCT_VERSION(EccSramErrorStatus, 1) //!< Version macro for \a nvmlEccSramErrorStatus_v1_t /** * Structure to store platform information @@ -1436,7 +1442,7 @@ typedef struct unsigned char peerType; //!< Platform indicated NVLink-peer type (e.g. switch present or not) unsigned char moduleId; //!< ID of this GPU within the node } nvmlPlatformInfo_v1_t; -#define nvmlPlatformInfo_v1 NVML_STRUCT_VERSION(PlatformInfo, 1) +#define nvmlPlatformInfo_v1 NVML_STRUCT_VERSION(PlatformInfo, 1) //!< Version macro for \a nvmlPlatformInfo_v1_t /** * Structure to store platform information (v2) @@ -1454,7 +1460,17 @@ typedef struct } nvmlPlatformInfo_v2_t; typedef nvmlPlatformInfo_v2_t nvmlPlatformInfo_t; -#define nvmlPlatformInfo_v2 NVML_STRUCT_VERSION(PlatformInfo, 2) +#define nvmlPlatformInfo_v2 NVML_STRUCT_VERSION(PlatformInfo, 2) //!< Version macro for \a nvmlPlatformInfo_v2_t + +/** + * Structure to store hostname information + */ +#define NVML_DEVICE_HOSTNAME_BUFFER_SIZE 64 //!< Buffer size for hostname string. + +typedef struct +{ + char value[NVML_DEVICE_HOSTNAME_BUFFER_SIZE]; //!< null-terminated hostname string +} nvmlHostname_v1_t; typedef struct { @@ -1475,39 +1491,57 @@ typedef struct } nvmlEccSramUniqueUncorrectedErrorCounts_v1_t; typedef nvmlEccSramUniqueUncorrectedErrorCounts_v1_t nvmlEccSramUniqueUncorrectedErrorCounts_t; -#define nvmlEccSramUniqueUncorrectedErrorCounts_v1 NVML_STRUCT_VERSION(EccSramUniqueUncorrectedErrorCounts, 1) +#define nvmlEccSramUniqueUncorrectedErrorCounts_v1 NVML_STRUCT_VERSION(EccSramUniqueUncorrectedErrorCounts, 1) //!< Version macro for \a nvmlEccSramUniqueUncorrectedErrorCounts_v1_t + +#define NVML_RUSD_POLL_NONE 0x0 //!< Disable RUSD polling on all metric groups +#define NVML_RUSD_POLL_CLOCK 0x1 //!< Enable RUSD polling on clock group +#define NVML_RUSD_POLL_PERF 0x2 //!< Enable RUSD polling on performance group +#define NVML_RUSD_POLL_MEMORY 0x4 //!< Enable RUSD polling on memory group +#define NVML_RUSD_POLL_POWER 0x8 //!< Enable RUSD polling on power group +#define NVML_RUSD_POLL_THERMAL 0x10 //!< Enable RUSD polling on thermal group +#define NVML_RUSD_POLL_PCI 0x20 //!< Enable RUSD polling on pci group +#define NVML_RUSD_POLL_FAN 0x40 //!< Enable RUSD polling on fan group +#define NVML_RUSD_POLL_PROC_UTIL 0x80 //!< Enable RUSD polling on process utilization group +#define NVML_RUSD_POLL_ALL 0xFFFFFFFFFFFFFFFF //!< Enable RUSD polling on all groups + +typedef struct +{ + unsigned int version; + unsigned long long pollMask; //!< Bitmask of polling data. 0 value means the GPU's RUSD polling mask is cleared. +} nvmlRusdSettings_v1_t; +#define nvmlRusdSettings_v1 NVML_STRUCT_VERSION(RusdSettings, 1) //!< Version macro for \a nvmlRusdSettings_v1_t /** * GSP firmware */ -#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 +#define NVML_GSP_FIRMWARE_VERSION_BUF_SIZE 0x40 //!< Buffer size for GSP firmware version string. /** * Simplified chip architecture */ -#define NVML_DEVICE_ARCH_KEPLER 2 // Devices based on the NVIDIA Kepler architecture -#define NVML_DEVICE_ARCH_MAXWELL 3 // Devices based on the NVIDIA Maxwell architecture -#define NVML_DEVICE_ARCH_PASCAL 4 // Devices based on the NVIDIA Pascal architecture -#define NVML_DEVICE_ARCH_VOLTA 5 // Devices based on the NVIDIA Volta architecture -#define NVML_DEVICE_ARCH_TURING 6 // Devices based on the NVIDIA Turing architecture -#define NVML_DEVICE_ARCH_AMPERE 7 // Devices based on the NVIDIA Ampere architecture -#define NVML_DEVICE_ARCH_ADA 8 // Devices based on the NVIDIA Ada architecture -#define NVML_DEVICE_ARCH_HOPPER 9 // Devices based on the NVIDIA Hopper architecture +#define NVML_DEVICE_ARCH_KEPLER 2 //!< Devices based on the NVIDIA Kepler architecture +#define NVML_DEVICE_ARCH_MAXWELL 3 //!< Devices based on the NVIDIA Maxwell architecture +#define NVML_DEVICE_ARCH_PASCAL 4 //!< Devices based on the NVIDIA Pascal architecture +#define NVML_DEVICE_ARCH_VOLTA 5 //!< Devices based on the NVIDIA Volta architecture +#define NVML_DEVICE_ARCH_TURING 6 //!< Devices based on the NVIDIA Turing architecture +#define NVML_DEVICE_ARCH_AMPERE 7 //!< Devices based on the NVIDIA Ampere architecture +#define NVML_DEVICE_ARCH_ADA 8 //!< Devices based on the NVIDIA Ada architecture +#define NVML_DEVICE_ARCH_HOPPER 9 //!< Devices based on the NVIDIA Hopper architecture -#define NVML_DEVICE_ARCH_BLACKWELL 10 // Devices based on the NVIDIA Blackwell architecture +#define NVML_DEVICE_ARCH_BLACKWELL 10 //!< Devices based on the NVIDIA Blackwell architecture -#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff // Anything else, presumably something newer +#define NVML_DEVICE_ARCH_UNKNOWN 0xffffffff //!< Anything else, presumably something newer typedef unsigned int nvmlDeviceArchitecture_t; /** * PCI bus types */ -#define NVML_BUS_TYPE_UNKNOWN 0 -#define NVML_BUS_TYPE_PCI 1 -#define NVML_BUS_TYPE_PCIE 2 -#define NVML_BUS_TYPE_FPCI 3 -#define NVML_BUS_TYPE_AGP 4 +#define NVML_BUS_TYPE_UNKNOWN 0 //!< Unknown bus type. +#define NVML_BUS_TYPE_PCI 1 //!< PCI bus. +#define NVML_BUS_TYPE_PCIE 2 //!< PCI-Express bus. +#define NVML_BUS_TYPE_FPCI 3 //!< FPCI bus. +#define NVML_BUS_TYPE_AGP 4 //!< AGP bus. typedef unsigned int nvmlBusType_t; @@ -1518,38 +1552,38 @@ typedef unsigned int nvmlBusType_t; /** * Device Fan control policy */ -#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 -#define NVML_FAN_POLICY_MANUAL 1 +#define NVML_FAN_POLICY_TEMPERATURE_CONTINOUS_SW 0 //!< Temperature-controlled fan policy. +#define NVML_FAN_POLICY_MANUAL 1 //!< Manual fan control policy. typedef unsigned int nvmlFanControlPolicy_t; /** * Device Power Source */ -#define NVML_POWER_SOURCE_AC 0x00000000 -#define NVML_POWER_SOURCE_BATTERY 0x00000001 -#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 +#define NVML_POWER_SOURCE_AC 0x00000000 //!< AC power source. +#define NVML_POWER_SOURCE_BATTERY 0x00000001 //!< Battery power source. +#define NVML_POWER_SOURCE_UNDERSIZED 0x00000002 //!< Undersized power source. typedef unsigned int nvmlPowerSource_t; /** * Device PCIE link Max Speed */ -#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 -#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 -#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 -#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 -#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 -#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 -#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 +#define NVML_PCIE_LINK_MAX_SPEED_INVALID 0x00000000 //!< Invalid PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_2500MBPS 0x00000001 //!< 2500 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_5000MBPS 0x00000002 //!< 5000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_8000MBPS 0x00000003 //!< 8000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_16000MBPS 0x00000004 //!< 16000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_32000MBPS 0x00000005 //!< 32000 MB/s PCIe link speed. +#define NVML_PCIE_LINK_MAX_SPEED_64000MBPS 0x00000006 //!< 64000 MB/s PCIe link speed. /** * Adaptive clocking status */ -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 -#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_DISABLED 0x00000000 //!< Adaptive clocking is disabled. +#define NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED 0x00000001 //!< Adaptive clocking is enabled. -#define NVML_MAX_GPU_UTILIZATIONS 8 +#define NVML_MAX_GPU_UTILIZATIONS 8 //!< Maximum number of GPU utilization domains. /** * Represents the GPU utilization domains @@ -1562,29 +1596,30 @@ typedef enum nvmlGpuUtilizationDomainId_t NVML_GPU_UTILIZATION_DOMAIN_BUS = 3, //!< Bus interface domain } nvmlGpuUtilizationDomainId_t; +typedef struct { + unsigned int bIsPresent; + unsigned int percentage; + unsigned int incThreshold; + unsigned int decThreshold; +} nvmlGpuDynamicPstatesInfoUtilization_t; + typedef struct nvmlGpuDynamicPstatesInfo_st { unsigned int flags; //!< Reserved for future use - struct - { - unsigned int bIsPresent; //!< Set if this utilization domain is present on this GPU - unsigned int percentage; //!< Percentage of time where the domain is considered busy in the last 1-second interval - unsigned int incThreshold; //!< Utilization threshold that can trigger a perf-increasing P-State change when crossed - unsigned int decThreshold; //!< Utilization threshold that can trigger a perf-decreasing P-State change when crossed - } utilization[NVML_MAX_GPU_UTILIZATIONS]; + nvmlGpuDynamicPstatesInfoUtilization_t utilization[NVML_MAX_GPU_UTILIZATIONS]; } nvmlGpuDynamicPstatesInfo_t; /* * PCIe outbound/inbound atomic operations capability */ -#define NVML_PCIE_ATOMICS_CAP_FETCHADD32 0x01 -#define NVML_PCIE_ATOMICS_CAP_FETCHADD64 0x02 -#define NVML_PCIE_ATOMICS_CAP_SWAP32 0x04 -#define NVML_PCIE_ATOMICS_CAP_SWAP64 0x08 -#define NVML_PCIE_ATOMICS_CAP_CAS32 0x10 -#define NVML_PCIE_ATOMICS_CAP_CAS64 0x20 -#define NVML_PCIE_ATOMICS_CAP_CAS128 0x40 -#define NVML_PCIE_ATOMICS_OPS_MAX 7 +#define NVML_PCIE_ATOMICS_CAP_FETCHADD32 0x01 //!< 32-bit fetch and add. +#define NVML_PCIE_ATOMICS_CAP_FETCHADD64 0x02 //!< 64-bit fetch and add. +#define NVML_PCIE_ATOMICS_CAP_SWAP32 0x04 //!< 32-bit swap. +#define NVML_PCIE_ATOMICS_CAP_SWAP64 0x08 //!< 64-bit swap. +#define NVML_PCIE_ATOMICS_CAP_CAS32 0x10 //!< 32-bit compare and swap. +#define NVML_PCIE_ATOMICS_CAP_CAS64 0x20 //!< 64-bit compare and swap. +#define NVML_PCIE_ATOMICS_CAP_CAS128 0x40 //!< 128-bit compare and swap. +#define NVML_PCIE_ATOMICS_OPS_MAX 7 //!< Maximum number of PCIe atomics operations. /** * Device Scope - This is useful to retrieve the telemetry at GPU and module (e.g. GPU + CPU) level @@ -1605,7 +1640,7 @@ typedef struct unsigned int powerValueMw; //!< [out] Power value to retrieve or set in milliwatts } nvmlPowerValue_v2_t; -#define nvmlPowerValue_v2 NVML_STRUCT_VERSION(PowerValue, 2) +#define nvmlPowerValue_v2 NVML_STRUCT_VERSION(PowerValue, 2) //!< Version macro for \a nvmlPowerValue_v2_t /** @} */ @@ -1750,22 +1785,22 @@ typedef enum nvmlDeviceVgpuCapability_enum /*! * Macros for vGPU instance's virtualization capabilities bitfield. */ -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 -#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 //!< vGPU migration capability. +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 //!< vGPU migration is not supported. +#define NVML_VGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 //!< vGPU migration is supported. /*! * Macros for pGPU's virtualization capabilities bitfield. */ -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 -#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION 0:0 //!< Physical GPU migration capability. +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_NO 0x0 //!< Physical GPU migration is not supported. +#define NVML_VGPU_PGPU_VIRTUALIZATION_CAP_MIGRATION_YES 0x1 //!< Physical GPU migration is supported. /** * Macros to indicate the vGPU mode of the GPU. */ -#define NVML_VGPU_PGPU_HETEROGENEOUS_MODE 0 -#define NVML_VGPU_PGPU_HOMOGENEOUS_MODE 1 +#define NVML_VGPU_PGPU_HETEROGENEOUS_MODE 0 //!< Heterogeneous vGPU mode. +#define NVML_VGPU_PGPU_HOMOGENEOUS_MODE 1 //!< Homogeneous vGPU mode. /** @} */ @@ -1788,7 +1823,7 @@ typedef struct unsigned int mode; //!< The vGPU heterogeneous mode } nvmlVgpuHeterogeneousMode_v1_t; typedef nvmlVgpuHeterogeneousMode_v1_t nvmlVgpuHeterogeneousMode_t; -#define nvmlVgpuHeterogeneousMode_v1 NVML_STRUCT_VERSION(VgpuHeterogeneousMode, 1) +#define nvmlVgpuHeterogeneousMode_v1 NVML_STRUCT_VERSION(VgpuHeterogeneousMode, 1) //!< Version macro for \a nvmlVgpuHeterogeneousMode_v1_t /** * Structure to store the placement ID of vGPU instance -- version 1 @@ -1799,7 +1834,7 @@ typedef struct unsigned int placementId; //!< Placement ID of the active vGPU instance } nvmlVgpuPlacementId_v1_t; typedef nvmlVgpuPlacementId_v1_t nvmlVgpuPlacementId_t; -#define nvmlVgpuPlacementId_v1 NVML_STRUCT_VERSION(VgpuPlacementId, 1) +#define nvmlVgpuPlacementId_v1 NVML_STRUCT_VERSION(VgpuPlacementId, 1) //!< Version macro for \a nvmlVgpuPlacementId_v1_t /** * Structure to store the list of vGPU placements -- version 1 @@ -1811,7 +1846,7 @@ typedef struct unsigned int count; //!< Count of placement IDs fetched unsigned int *placementIds; //!< Placement IDs for the vGPU type } nvmlVgpuPlacementList_v1_t; -#define nvmlVgpuPlacementList_v1 NVML_STRUCT_VERSION(VgpuPlacementList, 1) +#define nvmlVgpuPlacementList_v1 NVML_STRUCT_VERSION(VgpuPlacementList, 1) //!< Version macro for \a nvmlVgpuPlacementList_v1_t /** * Structure to store the list of vGPU placements -- version 2 @@ -1825,7 +1860,7 @@ typedef struct unsigned int mode; //!< IN: The vGPU mode. Either NVML_VGPU_PGPU_HETEROGENEOUS_MODE or NVML_VGPU_PGPU_HOMOGENEOUS_MODE } nvmlVgpuPlacementList_v2_t; typedef nvmlVgpuPlacementList_v2_t nvmlVgpuPlacementList_t; -#define nvmlVgpuPlacementList_v2 NVML_STRUCT_VERSION(VgpuPlacementList, 2) +#define nvmlVgpuPlacementList_v2 NVML_STRUCT_VERSION(VgpuPlacementList, 2) //!< Version macro for \a nvmlVgpuPlacementList_v2_t /** * Structure to store BAR1 size information of vGPU type -- Version 1 @@ -1836,7 +1871,7 @@ typedef struct unsigned long long bar1Size; //!< BAR1 size in megabytes } nvmlVgpuTypeBar1Info_v1_t; typedef nvmlVgpuTypeBar1Info_v1_t nvmlVgpuTypeBar1Info_t; -#define nvmlVgpuTypeBar1Info_v1 NVML_STRUCT_VERSION(VgpuTypeBar1Info, 1) +#define nvmlVgpuTypeBar1Info_v1 NVML_STRUCT_VERSION(VgpuTypeBar1Info, 1) //!< Version macro for \a nvmlVgpuTypeBar1Info_v1_t /** * Structure to store Utilization Value and vgpuInstance @@ -1878,7 +1913,7 @@ typedef struct nvmlVgpuInstanceUtilizationInfo_v1_t *vgpuUtilArray; //!< The array (allocated by caller) in which vGPU utilization are returned } nvmlVgpuInstancesUtilizationInfo_v1_t; typedef nvmlVgpuInstancesUtilizationInfo_v1_t nvmlVgpuInstancesUtilizationInfo_t; -#define nvmlVgpuInstancesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuInstancesUtilizationInfo, 1) +#define nvmlVgpuInstancesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuInstancesUtilizationInfo, 1) //!< Version macro for \a nvmlVgpuInstancesUtilizationInfo_v1_t /** * Structure to store Utilization Value, vgpuInstance and subprocess information @@ -1923,7 +1958,7 @@ typedef struct nvmlVgpuProcessUtilizationInfo_v1_t *vgpuProcUtilArray; //!< The array (allocated by caller) in which utilization of processes running on vGPU instances are returned } nvmlVgpuProcessesUtilizationInfo_v1_t; typedef nvmlVgpuProcessesUtilizationInfo_v1_t nvmlVgpuProcessesUtilizationInfo_t; -#define nvmlVgpuProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuProcessesUtilizationInfo, 1) +#define nvmlVgpuProcessesUtilizationInfo_v1 NVML_STRUCT_VERSION(VgpuProcessesUtilizationInfo, 1) //!< Version macro for \a nvmlVgpuProcessesUtilizationInfo_v1_t /** * Structure to store the information of vGPU runtime state -- version 1 @@ -1934,44 +1969,47 @@ typedef struct unsigned long long size; //!< OUT: The runtime state size of the vGPU instance } nvmlVgpuRuntimeState_v1_t; typedef nvmlVgpuRuntimeState_v1_t nvmlVgpuRuntimeState_t; -#define nvmlVgpuRuntimeState_v1 NVML_STRUCT_VERSION(VgpuRuntimeState, 1) +#define nvmlVgpuRuntimeState_v1 NVML_STRUCT_VERSION(VgpuRuntimeState, 1) //!< Version macro for \a nvmlVgpuRuntimeState_v1_t /** * vGPU scheduler policies */ -#define NVML_VGPU_SCHEDULER_POLICY_UNKNOWN 0 -#define NVML_VGPU_SCHEDULER_POLICY_BEST_EFFORT 1 -#define NVML_VGPU_SCHEDULER_POLICY_EQUAL_SHARE 2 -#define NVML_VGPU_SCHEDULER_POLICY_FIXED_SHARE 3 +#define NVML_VGPU_SCHEDULER_POLICY_UNKNOWN 0 //!< Unknown scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_BEST_EFFORT 1 //!< Best effort scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_EQUAL_SHARE 2 //!< Equal share scheduler policy. +#define NVML_VGPU_SCHEDULER_POLICY_FIXED_SHARE 3 //!< Fixed share scheduler policy. -#define NVML_SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT 3 +#define NVML_SUPPORTED_VGPU_SCHEDULER_POLICY_COUNT 3 //!< Number of supported vGPU scheduler policies. -#define NVML_SCHEDULER_SW_MAX_LOG_ENTRIES 200 +#define NVML_SCHEDULER_SW_MAX_LOG_ENTRIES 200 //!< Maximum number of scheduler log entries. -#define NVML_VGPU_SCHEDULER_ARR_DEFAULT 0 -#define NVML_VGPU_SCHEDULER_ARR_DISABLE 1 -#define NVML_VGPU_SCHEDULER_ARR_ENABLE 2 +#define NVML_VGPU_SCHEDULER_ARR_DEFAULT 0 //!< Default Adaptive Round Robin mode. +#define NVML_VGPU_SCHEDULER_ARR_DISABLE 1 //!< Disable Adaptive Round Robin mode. +#define NVML_VGPU_SCHEDULER_ARR_ENABLE 2 //!< Enable Adaptive Round Robin mode. /** * vGPU scheduler engine types + * A GPU or GI may support a subset of engines */ -#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS 1 +#define NVML_VGPU_SCHEDULER_ENGINE_TYPE_GRAPHICS 1 //!< Graphics engine. /** * Union to represent the vGPU Scheduler Parameters */ +typedef struct { + unsigned int avgFactor; + unsigned int timeslice; +} nvmlVgpuSchedulerParamsVgpuSchedDataWithARR_t; + +typedef struct { + unsigned int timeslice; +} nvmlVgpuSchedulerParamsVgpuSchedData_t; + typedef union { - struct - { - unsigned int avgFactor; //!< Average factor in compensating the timeslice for Adaptive Round Robin mode - unsigned int timeslice; //!< The timeslice in ns for each software run list as configured, or the default value otherwise - } vgpuSchedDataWithARR; + nvmlVgpuSchedulerParamsVgpuSchedDataWithARR_t vgpuSchedDataWithARR; - struct - { - unsigned int timeslice; //!< The timeslice in ns for each software run list as configured, or the default value otherwise - } vgpuSchedData; + nvmlVgpuSchedulerParamsVgpuSchedData_t vgpuSchedData; } nvmlVgpuSchedulerParams_t; @@ -2014,18 +2052,20 @@ typedef struct nvmlVgpuSchedulerGetState_st /** * Union to represent the vGPU Scheduler set Parameters */ +typedef struct { + unsigned int avgFactor; + unsigned int frequency; +} nvmlVgpuSchedulerSetParamsVgpuSchedDataWithARR_t; + +typedef struct { + unsigned int timeslice; +} nvmlVgpuSchedulerSetParamsVgpuSchedData_t; + typedef union { - struct - { - unsigned int avgFactor; //!< Average factor in compensating the timeslice for Adaptive Round Robin mode - unsigned int frequency; //!< Frequency for Adaptive Round Robin mode - } vgpuSchedDataWithARR; + nvmlVgpuSchedulerSetParamsVgpuSchedDataWithARR_t vgpuSchedDataWithARR; - struct - { - unsigned int timeslice; //!< The timeslice in ns(Nanoseconds) for each software run list as configured, or the default value otherwise - } vgpuSchedData; + nvmlVgpuSchedulerSetParamsVgpuSchedData_t vgpuSchedData; } nvmlVgpuSchedulerSetParams_t; @@ -2143,7 +2183,7 @@ typedef struct nvmlVgpuTypeId_t *vgpuTypeIds; //!< OUT: List of vGPU type IDs } nvmlVgpuTypeIdInfo_v1_t; typedef nvmlVgpuTypeIdInfo_v1_t nvmlVgpuTypeIdInfo_t; -#define nvmlVgpuTypeIdInfo_v1 NVML_STRUCT_VERSION(VgpuTypeIdInfo, 1) +#define nvmlVgpuTypeIdInfo_v1 NVML_STRUCT_VERSION(VgpuTypeIdInfo, 1) //!< Version macro for \a nvmlVgpuTypeIdInfo_v1_t /** * Structure to store the maximum number of possible vGPU type IDs -- version 1 @@ -2155,7 +2195,7 @@ typedef struct unsigned int maxInstancePerGI; //!< OUT: Maximum number of vGPU instances per GPU instance } nvmlVgpuTypeMaxInstance_v1_t; typedef nvmlVgpuTypeMaxInstance_v1_t nvmlVgpuTypeMaxInstance_t; -#define nvmlVgpuTypeMaxInstance_v1 NVML_STRUCT_VERSION(VgpuTypeMaxInstance, 1) +#define nvmlVgpuTypeMaxInstance_v1 NVML_STRUCT_VERSION(VgpuTypeMaxInstance, 1) //!< Version macro for \a nvmlVgpuTypeMaxInstance_v1_t /** * Structure to store active vGPU instance information -- Version 1 @@ -2167,7 +2207,7 @@ typedef struct nvmlVgpuInstance_t *vgpuInstances; //!< IN/OUT: list of active vGPU instances } nvmlActiveVgpuInstanceInfo_v1_t; typedef nvmlActiveVgpuInstanceInfo_v1_t nvmlActiveVgpuInstanceInfo_t; -#define nvmlActiveVgpuInstanceInfo_v1 NVML_STRUCT_VERSION(ActiveVgpuInstanceInfo, 1) +#define nvmlActiveVgpuInstanceInfo_v1 NVML_STRUCT_VERSION(ActiveVgpuInstanceInfo, 1) //!< Version macro for \a nvmlActiveVgpuInstanceInfo_v1_t /** * Structure to set vGPU scheduler state information -- version 1 @@ -2181,7 +2221,7 @@ typedef struct nvmlVgpuSchedulerSetParams_t schedulerParams; //!< IN: vGPU Scheduler Parameters } nvmlVgpuSchedulerState_v1_t; typedef nvmlVgpuSchedulerState_v1_t nvmlVgpuSchedulerState_t; -#define nvmlVgpuSchedulerState_v1 NVML_STRUCT_VERSION(VgpuSchedulerState, 1) +#define nvmlVgpuSchedulerState_v1 NVML_STRUCT_VERSION(VgpuSchedulerState, 1) //!< Version macro for \a nvmlVgpuSchedulerState_v1_t /** * Structure to store vGPU scheduler state information -- Version 1 @@ -2195,7 +2235,7 @@ typedef struct nvmlVgpuSchedulerParams_t schedulerParams; //!< OUT: vGPU Scheduler Parameters } nvmlVgpuSchedulerStateInfo_v1_t; typedef nvmlVgpuSchedulerStateInfo_v1_t nvmlVgpuSchedulerStateInfo_t; -#define nvmlVgpuSchedulerStateInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerStateInfo, 1) +#define nvmlVgpuSchedulerStateInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerStateInfo, 1) //!< Version macro for \a nvmlVgpuSchedulerStateInfo_v1_t /** * Structure to store vGPU scheduler log information -- Version 1 @@ -2211,7 +2251,7 @@ typedef struct nvmlVgpuSchedulerLogEntry_t logEntries[NVML_SCHEDULER_SW_MAX_LOG_ENTRIES]; //!< OUT: Structure to store the state and logs of a software runlist } nvmlVgpuSchedulerLogInfo_v1_t; typedef nvmlVgpuSchedulerLogInfo_v1_t nvmlVgpuSchedulerLogInfo_t; -#define nvmlVgpuSchedulerLogInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerLogInfo, 1) +#define nvmlVgpuSchedulerLogInfo_v1 NVML_STRUCT_VERSION(VgpuSchedulerLogInfo, 1) //!< Version macro for \a nvmlVgpuSchedulerLogInfo_v1_t /** * Structure to store creatable vGPU placement information -- version 1 @@ -2225,7 +2265,7 @@ typedef struct unsigned int placementSize; //!< OUT: The number of slots occupied by the vGPU type } nvmlVgpuCreatablePlacementInfo_v1_t; typedef nvmlVgpuCreatablePlacementInfo_v1_t nvmlVgpuCreatablePlacementInfo_t; -#define nvmlVgpuCreatablePlacementInfo_v1 NVML_STRUCT_VERSION(VgpuCreatablePlacementInfo, 1) +#define nvmlVgpuCreatablePlacementInfo_v1 NVML_STRUCT_VERSION(VgpuCreatablePlacementInfo, 1) //!< Version macro for \a nvmlVgpuCreatablePlacementInfo_v1_t /** @} */ /** @} */ @@ -2701,37 +2741,128 @@ typedef nvmlVgpuCreatablePlacementInfo_v1_t nvmlVgpuCreatablePlacementInfo_t; #define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_13 248 //!< Count of symbol errors that are corrected - bin 13 #define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_14 249 //!< Count of symbol errors that are corrected - bin 14 #define NVML_FI_DEV_NVLINK_COUNT_FEC_HISTORY_15 250 //!< Count of symbol errors that are corrected - bin 15 +/* Power Smoothing */ +#define NVML_FI_PWR_SMOOTHING_ENABLED 251 //!< Enablement (0/DISABLED or 1/ENABLED) +#define NVML_FI_PWR_SMOOTHING_PRIV_LVL 252 //!< Current privilege level +#define NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED 253 //!< Immediate ramp down enablement (0/DISABLED or 1/ENABLED) +#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL 254 //!< Applied TMP ceiling value in Watts +#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR 255 //!< Applied TMP floor value in Watts +#define NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING 256 //!< Max % TMP Floor value +#define NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING 257 //!< Min % TMP Floor value +#define NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING 258 //!< HW Circuitry % lifetime remaining +#define NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES 259 //!< Max number of preset profiles +#define NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR 260 //!< % TMP floor for a given profile +#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE 261 //!< Ramp up rate in mW/s for a given profile +#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE 262 //!< Ramp down rate in mW/s for a given profile +#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL 263 //!< Ramp down hysteresis value in ms for a given profile +#define NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE 264 //!< Active preset profile number +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR 265 //!< % TMP floor for a given profile +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE 266 //!< Ramp up rate in mW/s for a given profile +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE 267 //!< Ramp down rate in mW/s for a given profile +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL 268 //!< Ramp down hysteresis value in ms for a given profile /** * Field values for Clock Throttle Reason Counters * All counters are in nanoseconds */ #define NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_POWER_CAP NVML_FI_DEV_PERF_POLICY_POWER //!< Throttling to not exceed currently set power limits in ns #define NVML_FI_DEV_CLOCKS_EVENT_REASON_SYNC_BOOST NVML_FI_DEV_PERF_POLICY_SYNC_BOOST //!< Throttling to match minimum possible clock across Sync Boost Group in ns -#define NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN 251 //!< Throttling to ensure ((GPU temp < GPU Max Operating Temp) && (Memory Temp < Memory Max Operating Temp)) in ns -#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN 252 //!< Throttling due to temperature being too high (reducing core clocks by a factor of 2 or more) in ns -#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN 253 //!< Throttling due to external power brake assertion trigger (reducing core clocks by a factor of 2 or more) in ns -#define NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ 254 //!< Accumulated frequency of the GPU to be used for averaging -#define NVML_FI_DEV_POWER_SYNC_BALANCING_AF 255 //!< Accumulated activity factor of the GPU to be used for averaging -/* Power Smoothing */ -#define NVML_FI_PWR_SMOOTHING_ENABLED 256 //!< Enablement (0/DISABLED or 1/ENABLED) -#define NVML_FI_PWR_SMOOTHING_PRIV_LVL 257 //!< Current privilege level -#define NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED 258 //!< Immediate ramp down enablement (0/DISABLED or 1/ENABLED) -#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL 259 //!< Applied TMP ceiling value in Watts -#define NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR 260 //!< Applied TMP floor value in Watts -#define NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING 261 //!< Max % TMP Floor value -#define NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING 262 //!< Min % TMP Floor value -#define NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING 263 //!< HW Circuitry % lifetime remaining -#define NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES 264 //!< Max number of preset profiles -#define NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR 265 //!< % TMP floor for a given profile -#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE 266 //!< Ramp up rate in mW/s for a given profile -#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE 267 //!< Ramp down rate in mW/s for a given profile -#define NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL 268 //!< Ramp down hysteresis value in ms for a given profile -#define NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE 269 //!< Active preset profile number -#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR 270 //!< % TMP floor for a given profile -#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE 271 //!< Ramp up rate in mW/s for a given profile -#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE 272 //!< Ramp down rate in mW/s for a given profile -#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL 273 //!< Ramp down hysteresis value in ms for a given profile -#define NVML_FI_MAX 274 //!< One greater than the largest field ID defined above +#define NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN 269 //!< Throttling to ensure ((GPU temp < GPU Max Operating Temp) && (Memory Temp < Memory Max Operating Temp)) in ns +#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN 270 //!< Throttling due to temperature being too high (reducing core clocks by a factor of 2 or more) in ns +#define NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN 271 //!< Throttling due to external power brake assertion trigger (reducing core clocks by a factor of 2 or more) in ns +#define NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ 272 //!< Accumulated frequency of the GPU to be used for averaging +#define NVML_FI_DEV_POWER_SYNC_BALANCING_AF 273 //!< Accumulated activity factor of the GPU to be used for averaging +#define NVML_FI_DEV_EDPP_MULTIPLIER 274 //!< EDPp multiplier expressed as a percentage +/** + * Current primary power floor value in Watts. + * This value is calculated by doing "TMP ceiling value * (% TMP floor value)". + */ +#define NVML_FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR 275 +/** + * Current secondary power floor value in Watts. + * This is the power floor that is applied during active workload periods on the GPU when primary + * floor activation window multiplier is set to a non-zero value. + */ +#define NVML_FI_PWR_SMOOTHING_SECONDARY_POWER_FLOOR 276 +/** + * Minimum primary floor activation offset value in Watts. + * This is the minimum primary floor activation offset accepted by the driver specified in Watts. + * This is a static field. + */ +#define NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_OFFSET 277 +/** + * Minimum primary floor activation point value in Watts. + * This is the minimum absolute raw value specified in Watts that the driver will use for switching + * between primary and secondary floor. This point is calculated as "secondary power floor + + * primary floor activation offset", and then computed value is floored to "min primary floor + * activation point" by the driver at run time. This value is used to avoid setting of switch point + * too low accidentally. + */ +#define NVML_FI_PWR_SMOOTHING_MIN_PRIMARY_FLOOR_ACT_POINT 278 +/** + * Window Multiplier value in ms. + * This is the multiplier unit specified in ms for other multipliers in the profile (primary floor + * activation window multiplier and primary floor target window multiplier). This is a static field. + */ +#define NVML_FI_PWR_SMOOTHING_WINDOW_MULTIPLIER 279 +/** + * Support (0/Not Supported or 1/Supported) for delayed power smoothing. + */ +#define NVML_FI_PWR_SMOOTHING_DELAYED_PWR_SMOOTHING_SUPPORTED 280 +/** + * Current secondary power floor value in Watts for a given profile. + * This is the power floor that will be applied during active workload periods on the GPU when + * primary floor activation window multiplier is set to a non-zero value. + */ +#define NVML_FI_PWR_SMOOTHING_PROFILE_SECONDARY_POWER_FLOOR 281 +/** + * Current primary floor activation window multiplier value for a given profile. + * This is the "X" ms time multiplier for the activation moving average window size. The activation + * moving average is compared against the (secondary floor + primary floor activation offset value) + * to determine if the controller should switch from the secondary floor to the primary floor. + * Setting this to 0 will disable switching to the secondary floor. + */ +#define NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_WIN_MULT 282 +/** + * Current primary floor target window multiplier value for a given profile. + * This is the "X" ms time multiplier for the target moving average window size. When set to + * non-zero value, the target moving average power determines the primary floor. When set to 0, + * driver will use the Floor percentage instead to derive the primary floor. + */ +#define NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_TAR_WIN_MULT 283 +/** + * Current primary floor activation offset value in Watts for a given profile. + * If the target moving average falls below the secondary floor plus this offset, the primary floor + * will be activated. + */ +#define NVML_FI_PWR_SMOOTHING_PROFILE_PRIMARY_FLOOR_ACT_OFFSET 284 +/** + * Current secondary power floor value in Watts for admin override. + * This is the power floor that will be applied during active workload periods on the GPU when + * primary floor activation window multiplier is set to a non-zero value. + */ +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_SECONDARY_POWER_FLOOR 285 +/** + * Current primary floor activation window multiplier value for admin override. + * This is the "X" ms time multiplier for the activation moving average window size. The activation + * moving average is compared against the (secondary floor + primary floor activation offset value) + * to determine if the controller should switch from the secondary floor to the primary floor. + * Setting this to 0 will disable switching to the secondary floor. + */ +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_WIN_MULT 286 +/** + * Current primary floor target window multiplier value for admin override. + * This is the "X" ms time multiplier for the target moving average window size. When set to + * non-zero value, the target moving average power determines the primary floor. When set to 0, + * driver will use the Floor percentage instead to derive the primary floor. + */ +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_TAR_WIN_MULT 287 +/** + * Current primary floor activation offset value in Watts for admin override. + * If the target moving average falls below the secondary floor plus this offset, the primary floor + * will be activated. + */ +#define NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PRIMARY_FLOOR_ACT_OFFSET 288 +#define NVML_FI_MAX 289 //!< One greater than the largest field ID defined above /** * NVML_FI_DEV_NVLINK_GET_POWER_THRESHOLD_UNITS @@ -2918,19 +3049,19 @@ typedef struct * Types can be combined with bitwise or operator '|' when passed to \ref nvmlDeviceRegisterEvents */ //! Mask with no events -#define nvmlEventTypeNone 0x0000000000000000LL +#define nvmlEventTypeNone 0x0000000000000000LL //!< No event. //! Event about single bit ECC errors /** * \note A corrected texture memory error is not an ECC error, so it does not generate a single bit event */ -#define nvmlEventTypeSingleBitEccError 0x0000000000000001LL +#define nvmlEventTypeSingleBitEccError 0x0000000000000001LL //!< Single bit ECC error event. //! Event about double bit ECC errors /** * \note An uncorrected texture memory error is not an ECC error, so it does not generate a double bit event */ -#define nvmlEventTypeDoubleBitEccError 0x0000000000000002LL +#define nvmlEventTypeDoubleBitEccError 0x0000000000000002LL //!< Double bit ECC error event. //! Event about PState changes /** @@ -2938,43 +3069,43 @@ typedef struct * no work being executed on the GPU, power capping or thermal capping. In a typical situation, * Fermi-based GPU should stay in P0 for the duration of the execution of the compute process. */ -#define nvmlEventTypePState 0x0000000000000004LL +#define nvmlEventTypePState 0x0000000000000004LL //!< PState change event. //! Event that Xid critical error occurred -#define nvmlEventTypeXidCriticalError 0x0000000000000008LL +#define nvmlEventTypeXidCriticalError 0x0000000000000008LL //!< XID critical error event. //! Event about clock changes /** * Kepler only */ -#define nvmlEventTypeClock 0x0000000000000010LL +#define nvmlEventTypeClock 0x0000000000000010LL //!< Clock change event. //! Event about AC/Battery power source changes -#define nvmlEventTypePowerSourceChange 0x0000000000000080LL +#define nvmlEventTypePowerSourceChange 0x0000000000000080LL //!< Power source change event. //! Event about MIG configuration changes -#define nvmlEventMigConfigChange 0x0000000000000100LL +#define nvmlEventMigConfigChange 0x0000000000000100LL //!< MIG configuration change event. //! Event about single bit ECC error storm -#define nvmlEventTypeSingleBitEccErrorStorm 0x0000000000000200LL +#define nvmlEventTypeSingleBitEccErrorStorm 0x0000000000000200LL //!< Single bit ECC error storm event. //! Event about DRAM retirement event -#define nvmlEventTypeDramRetirementEvent 0x0000000000000400LL +#define nvmlEventTypeDramRetirementEvent 0x0000000000000400LL //!< DRAM retirement event. //! Event about DRAM retirement failure -#define nvmlEventTypeDramRetirementFailure 0x0000000000000800LL +#define nvmlEventTypeDramRetirementFailure 0x0000000000000800LL //!< DRAM retirement failure event. //! Event for Non Fatal Poison -#define nvmlEventTypeNonFatalPoisonError 0x0000000000001000LL +#define nvmlEventTypeNonFatalPoisonError 0x0000000000001000LL //!< Non-fatal poison error event. //! Event for Fatal Poison -#define nvmlEventTypeFatalPoisonError 0x0000000000002000LL +#define nvmlEventTypeFatalPoisonError 0x0000000000002000LL //!< Fatal poison error event. //! Event for GPU Unavailable -#define nvmlEventTypeGpuUnavailableError 0x0000000000004000LL +#define nvmlEventTypeGpuUnavailableError 0x0000000000004000LL //!< GPU unavailable error event. //! Event for GPU Recovery Action -#define nvmlEventTypeGpuRecoveryAction 0x0000000000008000LL +#define nvmlEventTypeGpuRecoveryAction 0x0000000000008000LL //!< GPU recovery action event. //! Mask of all events #define nvmlEventTypeAll (nvmlEventTypeNone \ @@ -2991,7 +3122,7 @@ typedef struct | nvmlEventTypeNonFatalPoisonError \ | nvmlEventTypeFatalPoisonError \ | nvmlEventTypeGpuUnavailableError \ - | nvmlEventTypeGpuRecoveryAction) + | nvmlEventTypeGpuRecoveryAction) //!< Mask of all event types. /** @} */ @@ -3024,7 +3155,7 @@ typedef struct #define nvmlSystemEventTypeGpuDriverUnbind 0x0000000000000001LL //!< Bitmask value of Driver Unbind System Event #define nvmlSystemEventTypeGpuDriverBind 0x0000000000000002LL //!< Bitmask value of Driver Bind System Event -#define nvmlSystemEventTypeCount 2 +#define nvmlSystemEventTypeCount 2 //!< Number of system event types. /** * nvmlSystemEventSetCreateRequest @@ -3098,18 +3229,18 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; /** Nothing is running on the GPU and the clocks are dropping to Idle state * \note This limiter may be removed in a later release */ -#define nvmlClocksEventReasonGpuIdle 0x0000000000000001LL +#define nvmlClocksEventReasonGpuIdle 0x0000000000000001LL //!< GPU is idle. /* * @deprecated No longer used */ -#define nvmlClocksEventReasonApplicationsClocksSetting 0x0000000000000002LL +#define nvmlClocksEventReasonApplicationsClocksSetting 0x0000000000000002LL //!< Clocks are set to application-specific values. /** * @deprecated Renamed to \ref nvmlClocksThrottleReasonApplicationsClocksSetting * as the name describes the situation more accurately. */ -#define nvmlClocksThrottleReasonUserDefinedClocks nvmlClocksEventReasonApplicationsClocksSetting +#define nvmlClocksThrottleReasonUserDefinedClocks nvmlClocksEventReasonApplicationsClocksSetting //!< Deprecated: User-defined clocks. /** The clocks have been optimized to ensure not to exceed currently set power limits * @@ -3117,7 +3248,7 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * @see nvmlDeviceSetPowerManagementLimit * @see nvmlDeviceGetPowerManagementLimit */ -#define nvmlClocksEventReasonSwPowerCap 0x0000000000000004LL +#define nvmlClocksEventReasonSwPowerCap 0x0000000000000004LL //!< Software power cap activated. /** HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * @@ -3132,7 +3263,7 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ -#define nvmlClocksThrottleReasonHwSlowdown 0x0000000000000008LL +#define nvmlClocksThrottleReasonHwSlowdown 0x0000000000000008LL //!< Hardware slowdown activated. /** Sync Boost * @@ -3143,7 +3274,7 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * holding this one at lower clocks. * */ -#define nvmlClocksEventReasonSyncBoost 0x0000000000000010LL +#define nvmlClocksEventReasonSyncBoost 0x0000000000000010LL //!< Sync boost activated. /** SW Thermal Slowdown * @@ -3152,7 +3283,7 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * - Current memory temperature does not exceeed Memory Max Operating Temperature * */ -#define nvmlClocksEventReasonSwThermalSlowdown 0x0000000000000020LL +#define nvmlClocksEventReasonSwThermalSlowdown 0x0000000000000020LL //!< Software thermal slowdown activated. /** HW Thermal Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * @@ -3163,7 +3294,7 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ -#define nvmlClocksThrottleReasonHwThermalSlowdown 0x0000000000000040LL +#define nvmlClocksThrottleReasonHwThermalSlowdown 0x0000000000000040LL //!< Hardware thermal slowdown activated. /** HW Power Brake Slowdown (reducing the core clocks by a factor of 2 or more) is engaged * @@ -3174,19 +3305,19 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; * @see nvmlDeviceGetTemperatureThreshold * @see nvmlDeviceGetPowerUsage */ -#define nvmlClocksThrottleReasonHwPowerBrakeSlowdown 0x0000000000000080LL +#define nvmlClocksThrottleReasonHwPowerBrakeSlowdown 0x0000000000000080LL //!< Hardware power brake slowdown activated. /** GPU clocks are limited by current setting of Display clocks * * @see bug 1997531 */ -#define nvmlClocksEventReasonDisplayClockSetting 0x0000000000000100LL +#define nvmlClocksEventReasonDisplayClockSetting 0x0000000000000100LL //!< Display clock setting limited. /** Bit mask representing no clocks throttling * * Clocks are as high as possible. * */ -#define nvmlClocksEventReasonNone 0x0000000000000000LL +#define nvmlClocksEventReasonNone 0x0000000000000000LL //!< No clock throttling. /** Bit mask representing all supported clocks throttling reasons * New reasons might be added to this list in the future @@ -3201,40 +3332,40 @@ typedef nvmlSystemEventSetWaitRequest_v1_t nvmlSystemEventSetWaitRequest_t; | nvmlClocksThrottleReasonHwThermalSlowdown \ | nvmlClocksThrottleReasonHwPowerBrakeSlowdown \ | nvmlClocksEventReasonDisplayClockSetting \ -) +) //!< Bitmask of all clock event reasons. /** * @deprecated Use \ref nvmlClocksEventReasonGpuIdle instead */ -#define nvmlClocksThrottleReasonGpuIdle nvmlClocksEventReasonGpuIdle +#define nvmlClocksThrottleReasonGpuIdle nvmlClocksEventReasonGpuIdle //!< Deprecated: GPU idle. /** * @deprecated */ -#define nvmlClocksThrottleReasonApplicationsClocksSetting nvmlClocksEventReasonApplicationsClocksSetting +#define nvmlClocksThrottleReasonApplicationsClocksSetting nvmlClocksEventReasonApplicationsClocksSetting //!< Deprecated: Application clocks setting. /** * @deprecated Use \ref nvmlClocksEventReasonSyncBoost instead */ -#define nvmlClocksThrottleReasonSyncBoost nvmlClocksEventReasonSyncBoost +#define nvmlClocksThrottleReasonSyncBoost nvmlClocksEventReasonSyncBoost //!< Deprecated: Sync boost. /** * @deprecated Use \ref nvmlClocksEventReasonSwPowerCap instead */ -#define nvmlClocksThrottleReasonSwPowerCap nvmlClocksEventReasonSwPowerCap +#define nvmlClocksThrottleReasonSwPowerCap nvmlClocksEventReasonSwPowerCap //!< Deprecated: Software power cap. /** * @deprecated Use \ref nvmlClocksEventReasonSwThermalSlowdown instead */ -#define nvmlClocksThrottleReasonSwThermalSlowdown nvmlClocksEventReasonSwThermalSlowdown +#define nvmlClocksThrottleReasonSwThermalSlowdown nvmlClocksEventReasonSwThermalSlowdown //!< Deprecated: Software thermal slowdown. /** * @deprecated Use \ref nvmlClocksEventReasonDisplayClockSetting instead */ -#define nvmlClocksThrottleReasonDisplayClockSetting nvmlClocksEventReasonDisplayClockSetting +#define nvmlClocksThrottleReasonDisplayClockSetting nvmlClocksEventReasonDisplayClockSetting //!< Deprecated: Display clock setting. /** * @deprecated Use \ref nvmlClocksEventReasonNone instead */ -#define nvmlClocksThrottleReasonNone nvmlClocksEventReasonNone +#define nvmlClocksThrottleReasonNone nvmlClocksEventReasonNone //!< Deprecated: No clock throttling. /** * @deprecated Use \ref nvmlClocksEventReasonAll instead */ -#define nvmlClocksThrottleReasonAll nvmlClocksEventReasonAll +#define nvmlClocksThrottleReasonAll nvmlClocksEventReasonAll //!< Deprecated: All clock throttling reasons. /** @} */ /***************************************************************************************************/ @@ -3402,17 +3533,17 @@ typedef enum nvmlPcieLinkState_enum /** * Confidential Compute CPU Capabilities values */ -#define NVML_CC_SYSTEM_CPU_CAPS_NONE 0 -#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV 1 -#define NVML_CC_SYSTEM_CPU_CAPS_INTEL_TDX 2 -#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP 3 -#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM 4 +#define NVML_CC_SYSTEM_CPU_CAPS_NONE 0 //!< No confidential compute CPU capability. +#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV 1 //!< AMD SEV CPU capability. +#define NVML_CC_SYSTEM_CPU_CAPS_INTEL_TDX 2 //!< Intel TDX CPU capability. +#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV_SNP 3 //!< AMD SEV-SNP CPU capability. +#define NVML_CC_SYSTEM_CPU_CAPS_AMD_SNP_VTOM 4 //!< AMD SNP-VTOM CPU capability. /** * Confidenial Compute GPU Capabilities values */ -#define NVML_CC_SYSTEM_GPUS_CC_NOT_CAPABLE 0 -#define NVML_CC_SYSTEM_GPUS_CC_CAPABLE 1 +#define NVML_CC_SYSTEM_GPUS_CC_NOT_CAPABLE 0 //!< GPUs are not confidential compute capable. +#define NVML_CC_SYSTEM_GPUS_CC_CAPABLE 1 //!< GPUs are confidential compute capable. typedef struct nvmlConfComputeSystemCaps_st { unsigned int cpuCaps; @@ -3422,21 +3553,21 @@ typedef struct nvmlConfComputeSystemCaps_st { /** * Confidential Compute DevTools Mode values */ -#define NVML_CC_SYSTEM_DEVTOOLS_MODE_OFF 0 -#define NVML_CC_SYSTEM_DEVTOOLS_MODE_ON 1 +#define NVML_CC_SYSTEM_DEVTOOLS_MODE_OFF 0 //!< DevTools mode is off. +#define NVML_CC_SYSTEM_DEVTOOLS_MODE_ON 1 //!< DevTools mode is on. /** * Confidential Compute Environment values */ -#define NVML_CC_SYSTEM_ENVIRONMENT_UNAVAILABLE 0 -#define NVML_CC_SYSTEM_ENVIRONMENT_SIM 1 -#define NVML_CC_SYSTEM_ENVIRONMENT_PROD 2 +#define NVML_CC_SYSTEM_ENVIRONMENT_UNAVAILABLE 0 //!< Environment is unavailable. +#define NVML_CC_SYSTEM_ENVIRONMENT_SIM 1 //!< Environment is simulation. +#define NVML_CC_SYSTEM_ENVIRONMENT_PROD 2 //!< Environment is production. /** * Confidential Compute Feature Status values */ -#define NVML_CC_SYSTEM_FEATURE_DISABLED 0 -#define NVML_CC_SYSTEM_FEATURE_ENABLED 1 +#define NVML_CC_SYSTEM_FEATURE_DISABLED 0 //!< Feature is disabled. +#define NVML_CC_SYSTEM_FEATURE_ENABLED 1 //!< Feature is enabled. typedef struct nvmlConfComputeSystemState_st { unsigned int environment; @@ -3447,9 +3578,9 @@ typedef struct nvmlConfComputeSystemState_st { /** * Confidential Compute Multigpu mode values */ -#define NVML_CC_SYSTEM_MULTIGPU_NONE 0 -#define NVML_CC_SYSTEM_MULTIGPU_PROTECTED_PCIE 1 -#define NVML_CC_SYSTEM_MULTIGPU_NVLE 2 +#define NVML_CC_SYSTEM_MULTIGPU_NONE 0 //!< Multi-GPU mode is none. +#define NVML_CC_SYSTEM_MULTIGPU_PROTECTED_PCIE 1 //!< Multi-GPU mode is protected PCIe. +#define NVML_CC_SYSTEM_MULTIGPU_NVLE 2 //!< Multi-GPU mode is NVLE. /** * Confidential Compute System settings @@ -3463,7 +3594,7 @@ typedef struct { } nvmlSystemConfComputeSettings_v1_t; typedef nvmlSystemConfComputeSettings_v1_t nvmlSystemConfComputeSettings_t; -#define nvmlSystemConfComputeSettings_v1 NVML_STRUCT_VERSION(SystemConfComputeSettings, 1) +#define nvmlSystemConfComputeSettings_v1 NVML_STRUCT_VERSION(SystemConfComputeSettings, 1) //!< Version macro for \a nvmlSystemConfComputeSettings_v1_t /** * Protected memory size @@ -3478,14 +3609,14 @@ nvmlConfComputeMemSizeInfo_st /** * Confidential Compute GPUs/System Ready State values */ -#define NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE 0 -#define NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE 1 +#define NVML_CC_ACCEPTING_CLIENT_REQUESTS_FALSE 0 //!< Client requests are not accepted. +#define NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE 1 //!< Client requests are accepted. /** * GPU Certificate Details */ -#define NVML_GPU_CERT_CHAIN_SIZE 0x1000 -#define NVML_GPU_ATTESTATION_CERT_CHAIN_SIZE 0x1400 +#define NVML_GPU_CERT_CHAIN_SIZE 0x1000 //!< Size of the certificate chain. +#define NVML_GPU_ATTESTATION_CERT_CHAIN_SIZE 0x1400 //!< Size of the attestation certificate chain. typedef struct nvmlConfComputeGpuCertificate_st { unsigned int certChainSize; @@ -3497,13 +3628,13 @@ typedef struct nvmlConfComputeGpuCertificate_st { /** * GPU Attestation Report */ -#define NVML_CC_GPU_CEC_NONCE_SIZE 0x20 -#define NVML_CC_GPU_ATTESTATION_REPORT_SIZE 0x2000 -#define NVML_CC_GPU_CEC_ATTESTATION_REPORT_SIZE 0x1000 -#define NVML_CC_CEC_ATTESTATION_REPORT_NOT_PRESENT 0 -#define NVML_CC_CEC_ATTESTATION_REPORT_PRESENT 1 -#define NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN 50 -#define NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX 65 +#define NVML_CC_GPU_CEC_NONCE_SIZE 0x20 //!< Size of the CEC nonce. +#define NVML_CC_GPU_ATTESTATION_REPORT_SIZE 0x2000 //!< Size of the attestation report. +#define NVML_CC_GPU_CEC_ATTESTATION_REPORT_SIZE 0x1000 //!< Size of the CEC attestation report. +#define NVML_CC_CEC_ATTESTATION_REPORT_NOT_PRESENT 0 //!< CEC attestation report is not present. +#define NVML_CC_CEC_ATTESTATION_REPORT_PRESENT 1 //!< CEC attestation report is present. +#define NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MIN 50 //!< Minimum attacker advantage for key rotation threshold. +#define NVML_CC_KEY_ROTATION_THRESHOLD_ATTACKER_ADVANTAGE_MAX 65 //!< Maximum attacker advantage for key rotation threshold. typedef struct nvmlConfComputeGpuAttestationReport_st { unsigned int isCecAttestationReportPresent; //!< output @@ -3521,7 +3652,7 @@ typedef struct nvmlConfComputeSetKeyRotationThresholdInfo_st { typedef nvmlConfComputeSetKeyRotationThresholdInfo_v1_t nvmlConfComputeSetKeyRotationThresholdInfo_t; #define nvmlConfComputeSetKeyRotationThresholdInfo_v1 \ - NVML_STRUCT_VERSION(ConfComputeSetKeyRotationThresholdInfo, 1) + NVML_STRUCT_VERSION(ConfComputeSetKeyRotationThresholdInfo, 1) //!< Version macro for \a nvmlConfComputeSetKeyRotationThresholdInfo_v1_t typedef struct nvmlConfComputeGetKeyRotationThresholdInfo_st { unsigned int version; @@ -3530,7 +3661,7 @@ typedef struct nvmlConfComputeGetKeyRotationThresholdInfo_st { typedef nvmlConfComputeGetKeyRotationThresholdInfo_v1_t nvmlConfComputeGetKeyRotationThresholdInfo_t; #define nvmlConfComputeGetKeyRotationThresholdInfo_v1 \ - NVML_STRUCT_VERSION(ConfComputeGetKeyRotationThresholdInfo, 1) + NVML_STRUCT_VERSION(ConfComputeGetKeyRotationThresholdInfo, 1) //!< Version macro for \a nvmlConfComputeGetKeyRotationThresholdInfo_v1_t /** @} */ @@ -3615,6 +3746,8 @@ typedef struct #define NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCORRECT_CHASSIS_SN 3 //!< Fabric Health Mask: Incorrect Configuration - Chassis Serial Number #define NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_NO_PARTITION 4 //!< Fabric Health Mask: Incorrect Configuration - No Partition #define NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INSUFFICIENT_NVLINKS 5 //!< Fabric Health Mask: Incorrect Configuration - Insufficient Nvlinks +#define NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INCOMPATIBLE_GPU_FW 6 //!< Fabric Health Mask: Incorrect Configuration - Incompatible GPU Firmware +#define NVML_GPU_FABRIC_HEALTH_MASK_INCORRECT_CONFIGURATION_INVALID_LOCATION 7 //!< Fabric Health Mask: Incorrect Configuration - Invalid Location #define NVML_GPU_FABRIC_HEALTH_MASK_SHIFT_INCORRECT_CONFIGURATION 8 //!< Fabric Health Mask Bit Shift for Incorrect Configuration #define NVML_GPU_FABRIC_HEALTH_MASK_WIDTH_INCORRECT_CONFIGURATION 0xf //!< Fabric Health Mask Width for Incorrect Configuration @@ -3634,7 +3767,7 @@ typedef struct */ #define NVML_GPU_FABRIC_HEALTH_GET(var, type) \ (((var) >> NVML_GPU_FABRIC_HEALTH_MASK_SHIFT##type) & \ - (NVML_GPU_FABRIC_HEALTH_MASK_WIDTH##type)) + (NVML_GPU_FABRIC_HEALTH_MASK_WIDTH##type)) //!< Macro to get GPU fabric health status. /** * GPU Fabric Health Status Mask for various fields can be tested @@ -3643,7 +3776,7 @@ typedef struct */ #define NVML_GPU_FABRIC_HEALTH_TEST(var, type, val) \ (NVML_GPU_FABRIC_HEALTH_GET(var, type) == \ - NVML_GPU_FABRIC_HEALTH_MASK##type##val) + NVML_GPU_FABRIC_HEALTH_MASK##type##val) //!< Macro to test GPU fabric health status. /** * GPU Fabric information (v2). @@ -3669,7 +3802,7 @@ typedef struct /** * Version identifier value for \ref nvmlGpuFabricInfo_v2_t.version. */ -#define nvmlGpuFabricInfo_v2 NVML_STRUCT_VERSION(GpuFabricInfo, 2) +#define nvmlGpuFabricInfo_v2 NVML_STRUCT_VERSION(GpuFabricInfo, 2) //!< Version macro for \a nvmlGpuFabricInfo_v2_t /** * GPU Fabric information (v3). @@ -3690,7 +3823,7 @@ typedef nvmlGpuFabricInfo_v3_t nvmlGpuFabricInfoV_t; /** * Version identifier value for \ref nvmlGpuFabricInfo_v3_t.version. */ -#define nvmlGpuFabricInfo_v3 NVML_STRUCT_VERSION(GpuFabricInfo, 3) +#define nvmlGpuFabricInfo_v3 NVML_STRUCT_VERSION(GpuFabricInfo, 3) //!< Version macro for \a nvmlGpuFabricInfo_v3_t /** @} */ @@ -3703,8 +3836,9 @@ typedef nvmlGpuFabricInfo_v3_t nvmlGpuFabricInfoV_t; */ /***************************************************************************************************/ -#define NVML_INIT_FLAG_NO_GPUS 1 //!< Don't fail nvmlInit() when no GPUs are found -#define NVML_INIT_FLAG_NO_ATTACH 2 //!< Don't attach GPUs +#define NVML_INIT_FLAG_NO_GPUS (1 << 0) //!< Don't fail nvmlInit() when no GPUs are found +#define NVML_INIT_FLAG_NO_ATTACH (1 << 1) //!< Don't attach GPUs +#define NVML_INIT_FLAG_FORCE_INIT (1 << 2) //!< Force GPU initialization when a previous nvmlInit was called with NO_GPUS and NO_ATTACH flags /** * Initialize NVML, but don't initialize any GPUs yet. @@ -3931,8 +4065,8 @@ nvmlReturn_t DECLDIR nvmlSystemGetCudaDriverVersion_v2(int *cudaDriverVersion); /** * Macros for converting the CUDA driver version number to Major and Minor version numbers. */ -#define NVML_CUDA_DRIVER_VERSION_MAJOR(v) ((v)/1000) -#define NVML_CUDA_DRIVER_VERSION_MINOR(v) (((v)%1000)/10) +#define NVML_CUDA_DRIVER_VERSION_MAJOR(v) ((v)/1000) //!< Macro to extract the major version number from the CUDA driver version. +#define NVML_CUDA_DRIVER_VERSION_MINOR(v) (((v)%1000)/10) //!< Macro to extract the minor version number from the CUDA driver version. /** * Gets name of the process with provided process id @@ -4003,7 +4137,7 @@ typedef struct char branch[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< driver branch } nvmlSystemDriverBranchInfo_v1_t; typedef nvmlSystemDriverBranchInfo_v1_t nvmlSystemDriverBranchInfo_t; -#define nvmlSystemDriverBranchInfo_v1 NVML_STRUCT_VERSION(SystemDriverBranchInfo, 1) +#define nvmlSystemDriverBranchInfo_v1 NVML_STRUCT_VERSION(SystemDriverBranchInfo, 1) //!< Version macro for \a nvmlSystemDriverBranchInfo_v1_t /** * Retrieves the driver branch of the NVIDIA driver installed on the system. @@ -4706,7 +4840,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNumaNodeId(nvmlDevice_t device, unsigned int * * a single set of page tables, and the CPU and GPU both use them. * 3. None: Neither HMM nor ATS is active. * - * %TURING_OR_NEWER% + * For Turing &tm; or newer fully supported devices. * Supported on Linux only. * * @param[in] device The device handle @@ -4738,6 +4872,24 @@ nvmlReturn_t DECLDIR nvmlDeviceGetAddressingMode(nvmlDevice_t device, nvmlDevice */ nvmlReturn_t DECLDIR nvmlDeviceGetRepairStatus(nvmlDevice_t device, nvmlRepairStatus_t *repairStatus); +/** + * Get the unrepairable memory flag for a given GPU + * + * For Hopper &tm; or newer fully supported devices. + * + * @param[in] device The identifier of the target device + * @param[out] unrepairableMemoryStatus Reference to \a nvmlUnrepairableMemoryStatus_v1_t + * + * @return + * - \ref NVML_SUCCESS if the query was successful + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the provided version is invalid/unsupported + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice_t device, nvmlUnrepairableMemoryStatus_v1_t *unrepairableMemoryStatus); + /** * Retrieve the common ancestor for two devices * For all products. @@ -5253,8 +5405,8 @@ nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t * * See \ref nvmlClockType_t for details on available clock information. * - * \note On GPUs from Fermi family current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks - * by few MHz. + * \note Current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks + * by a few MHz. * * @param device The identifier of the target device * @param type Identify which clock domain to query @@ -5520,13 +5672,12 @@ nvmlReturn_t DECLDIR nvmlDeviceGetTargetFanSpeed(nvmlDevice_t device, unsigned i * @param minSpeed The minimum speed allowed to set * @param maxSpeed The maximum speed allowed to set * - * return - * NVML_SUCCESS if speed has been adjusted - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if device is invalid - * NVML_ERROR_NOT_SUPPORTED if the device does not support this - * (doesn't have fans) - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if speed has been adjusted + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this (doesn't have fans) + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxFanSpeed(nvmlDevice_t device, unsigned int * minSpeed, unsigned int * maxSpeed); @@ -5538,16 +5689,17 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxFanSpeed(nvmlDevice_t device, unsigned i * * For all cuda-capable discrete products with fans * - * device The identifier of the target \a device - * policy Reference in which to return the fan control \a policy + * @param device The identifier of the target \a device + * @param fan The index of the target fan, zero indexed. + * @param policy Reference in which to return the fan control \a policy * - * return - * NVML_SUCCESS if \a policy has been populated - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference - * a fan that exists. - * NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if \a policy has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference + * a fan that exists. + * - \ref NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetFanControlPolicy_v2(nvmlDevice_t device, unsigned int fan, nvmlFanControlPolicy_t *policy); @@ -5611,7 +5763,7 @@ typedef struct typedef nvmlTemperature_v1_t nvmlTemperature_t; -#define nvmlTemperature_v1 NVML_STRUCT_VERSION(Temperature, 1) +#define nvmlTemperature_v1 NVML_STRUCT_VERSION(Temperature, 1) //!< Version macro for \a nvmlTemperature_v1_t /** * Retrieves the current temperature readings (in degrees C) for the given device. @@ -5836,9 +5988,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetMemClkVfOffset(nvmlDevice_t device, int *offse * @return * - \ref NVML_SUCCESS if everything worked * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a type or \a pstate are invalid or both - * \a minClockMHz and \a maxClockMHz are NULL + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a type or \a minClockMHz and \a maxClockMHz are NULL * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_UNKNOWN if \a type or \a pstate are invalid or any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMinMaxClockOfPState(nvmlDevice_t device, nvmlClockType_t type, nvmlPstates_t pstate, unsigned int * minClockMHz, unsigned int * maxClockMHz); @@ -6184,15 +6336,6 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int * * * @param device The identifier of the target device * @param powerMizerMode Reference in which to return the power mizer mode - * @param supportedPowerMizerModes Reference in which to return the bitmask of supported power mizer modes on this device. - * The supported modes can be combined using the bitwise OR operator '|'. - * For example, if a device supports all PowerMizer modes, the bitmask would be: - * supportedPowerMizerModes = ((1 << NVML_POWER_MIZER_MODE_ADAPTIVE) | - * (1 << NVML_POWER_MIZER_MODE_PREFER_MAXIMUM_PERFORMANCE) | - * (1 << NVML_POWER_MIZER_MODE_AUTO) | - * (1 << NVML_POWER_MIZER_MODE_PREFER_CONSISTENT_PERFORMANCE)); - * This bitmask can be used to check which power mizer modes are available on the device by performing - * a bitwise AND operation with the specific mode you want to check. * * @return * - \ref NVML_SUCCESS if \a powerMizerMode has been populated @@ -6316,6 +6459,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuO * the operating system is under memory pressure, it may resort to utilizing FB memory. * Such actions can result in discrepancies in the accuracy of memory reporting. * + * @note On certain SOC platforms, the integrated GPU (iGPU) does not use a dedicated framebuffer + * but instead shares memory with the system. As a result, \ref NVML_ERROR_NOT_SUPPORTED + * will be returned in this case. + * * @param device The identifier of the target device * @param memory Reference in which to return the memory information * @@ -6325,12 +6472,50 @@ nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuO * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_NOT_SUPPORTED if video memory is unsupported on the device * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory); /** + * Retrieves the amount of used, free, reserved and total memory available on the device, in bytes. * nvmlDeviceGetMemoryInfo_v2 accounts separately for reserved memory and includes it in the used memory amount. + * + * For all products. + * + * Enabling ECC reduces the amount of total available memory, due to the extra required parity bits. + * Under WDDM most device memory is allocated and managed on startup by Windows. + * + * Under Linux and Windows TCC, the reported amount of used memory is equal to the sum of memory allocated + * by all active channels on the device. + * + * @note In MIG mode, if device handle is provided, the API returns aggregate + * information, only if the caller has appropriate privileges. Per-instance + * information can be queried by using specific MIG device handles. + * + * @note On systems where GPUs are NUMA nodes, the accuracy of FB memory utilization + * provided by this API depends on the memory accounting of the operating system. + * This is because FB memory is managed by the operating system instead of the NVIDIA GPU driver. + * Typically, pages allocated from FB memory are not released even after + * the process terminates to enhance performance. In scenarios where + * the operating system is under memory pressure, it may resort to utilizing FB memory. + * Such actions can result in discrepancies in the accuracy of memory reporting. + * + * @note On certain SOC platforms, the integrated GPU (iGPU) does not use a dedicated framebuffer + * but instead shares memory with the system. As a result, \ref NVML_ERROR_NOT_SUPPORTED + * will be returned in this case. + * + * @param device The identifier of the target device + * @param memory Reference in which to return the memory information + * + * @return + * - \ref NVML_SUCCESS if \a memory has been populated + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_NOT_SUPPORTED if video memory is unsupported on the device + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo_v2(nvmlDevice_t device, nvmlMemory_v2_t *memory); @@ -6383,7 +6568,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int /** * Retrieves the current and pending DRAM Encryption modes for the device. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * Only applicable to devices that support DRAM Encryption * Requires \a NVML_INFOROM_DEN version 1.0 or higher. * @@ -6785,7 +6970,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsign /** * Retrieves the current utilization and sampling size in microseconds for the JPG * - * %TURING_OR_NEWER% + * For Turing &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying decoder utilization is not currently supported. * @@ -6806,7 +6991,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetJpgUtilization(nvmlDevice_t device, unsigned i /** * Retrieves the current utilization and sampling size in microseconds for the OFA (Optical Flow Accelerator) * - * %TURING_OR_NEWER% + * For Turing &tm; or newer fully supported devices. * * @note On MIG-enabled GPUs, querying decoder utilization is not currently supported. * @@ -8122,7 +8307,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetProcessesUtilizationInfo(nvmlDevice_t device, /** * Get platform information of this device. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * See \ref nvmlPlatformInfo_v2_t for more information on the struct. * @@ -8160,6 +8345,56 @@ nvmlReturn_t DECLDIR nvmlDeviceGetPlatformInfo(nvmlDevice_t device, nvmlPlatform */ nvmlReturn_t DECLDIR nvmlDeviceGetPdi(nvmlDevice_t device, nvmlPdi_t *pdi); +/** + * Set the hostname for the device. + * + * For Blackwell &tm; or newer fully supported devices. + * Requires root/admin permissions. + * Supported on Linux only. + * + * Sets a hostname string for the GPU device. This operation takes effect immediately. + * + * The hostname is not stored persistently across GPU resets or driver reloads. + * + * @param device The identifier of the target device + * @param hostname Reference to the caller-provided \ref nvmlHostname_v1_t struct containing the hostname + * + * @return + * - \ref NVML_SUCCESS if the hostname was set successfully + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a hostname is NULL or contains invalid characters + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceGetHostname_v1() + */ +nvmlReturn_t DECLDIR nvmlDeviceSetHostname_v1(nvmlDevice_t device, nvmlHostname_v1_t *hostname); + +/** + * Get the hostname for the device. + * + * For Blackwell &tm; or newer fully supported devices. + * Supported on Linux only. + * + * Retrieves the hostname string for the GPU device that was set using \ref nvmlDeviceSetHostname_v1(). + * + * @param device The identifier of the target device + * @param hostname Reference to the caller-provided \ref nvmlHostname_v1_t struct to return the hostname + * + * @return + * - \ref NVML_SUCCESS if the hostname was retrieved successfully + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a hostname is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN on any unexpected error + * + * @see nvmlDeviceSetHostname_v1() + */ +nvmlReturn_t DECLDIR nvmlDeviceGetHostname_v1(nvmlDevice_t device, nvmlHostname_v1_t *hostname); + /** @} */ /***************************************************************************************************/ @@ -8596,13 +8831,12 @@ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t d * @param device The identifier of the target device * @param fan The index of the fan, starting at zero * - * return - * NVML_SUCCESS if speed has been adjusted - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if device is invalid - * NVML_ERROR_NOT_SUPPORTED if the device does not support this - * (doesn't have fans) - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if speed has been adjusted + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid + * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this (doesn't have fans) + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultFanSpeed_v2(nvmlDevice_t device, unsigned int fan); @@ -8615,16 +8849,16 @@ nvmlReturn_t DECLDIR nvmlDeviceSetDefaultFanSpeed_v2(nvmlDevice_t device, unsign * * For all cuda-capable discrete products with fans * - * device The identifier of the target \a device - * policy The fan control \a policy to set + * @param device The identifier of the target \a device + * @param fan The index of the fan, starting at zero + * @param policy The fan control \a policy to set * - * return - * NVML_SUCCESS if \a policy has been set - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference - * a fan that exists. - * NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell - * NVML_ERROR_UNKNOWN on any unexpected error + * @return + * - \ref NVML_SUCCESS if \a policy has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a policy is null or the \a fan given doesn't reference a fan that exists. + * - \ref NVML_ERROR_NOT_SUPPORTED if the \a device is older than Maxwell + * - \ref NVML_ERROR_UNKNOWN on any unexpected error */ nvmlReturn_t DECLDIR nvmlDeviceSetFanControlPolicy(nvmlDevice_t device, unsigned int fan, nvmlFanControlPolicy_t policy); @@ -8745,17 +8979,17 @@ nvmlReturn_t DECLDIR nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestri * * For all cuda-capable discrete products with fans that are Maxwell or Newer. * - * device The identifier of the target device - * fan The index of the fan, starting at zero - * speed The target speed of the fan [0-100] in % of max speed + * @param device The identifier of the target device + * @param fan The index of the fan, starting at zero + * @param speed The target speed of the fan [0-100] in % of max speed * * return - * NVML_SUCCESS if the fan speed has been set - * NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized - * NVML_ERROR_INVALID_ARGUMENT if the device is not valid, or the speed is outside acceptable ranges, - * or if the fan index doesn't reference an actual fan. - * NVML_ERROR_NOT_SUPPORTED if the device is older than Maxwell. - * NVML_ERROR_UNKNOWN if there was an unexpected error. + * - \ref NVML_SUCCESS if the fan speed has been set + * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT if the device is not valid, or the speed is outside acceptable ranges, + * or if the fan index doesn't reference an actual fan. + * - \ref NVML_ERROR_NOT_SUPPORTED if the device is older than Maxwell. + * - \ref NVML_ERROR_UNKNOWN if there was an unexpected error. */ nvmlReturn_t DECLDIR nvmlDeviceSetFanSpeed_v2(nvmlDevice_t device, unsigned int fan, unsigned int speed); @@ -8866,11 +9100,11 @@ nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device); */ /***************************************************************************************************/ -#define NVML_NVLINK_BER_MANTISSA_SHIFT 8 -#define NVML_NVLINK_BER_MANTISSA_WIDTH 0xf +#define NVML_NVLINK_BER_MANTISSA_SHIFT 8 //!< Shift for NVLink BER mantissa. +#define NVML_NVLINK_BER_MANTISSA_WIDTH 0xf //!< Width for NVLink BER mantissa. -#define NVML_NVLINK_BER_EXP_SHIFT 0 -#define NVML_NVLINK_BER_EXP_WIDTH 0xff +#define NVML_NVLINK_BER_EXP_SHIFT 0 //!< Shift for NVLink BER exponent. +#define NVML_NVLINK_BER_EXP_WIDTH 0xff //!< Width for NVLink BER exponent. /** * Nvlink Error counter BER can be obtained using the below macros @@ -8883,11 +9117,26 @@ nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device); /* * NVML_FI_DEV_NVLINK_GET_STATE state enums */ -#define NVML_NVLINK_STATE_INACTIVE 0x0 -#define NVML_NVLINK_STATE_ACTIVE 0x1 -#define NVML_NVLINK_STATE_SLEEP 0x2 +#define NVML_NVLINK_STATE_INACTIVE 0x0 //!< NVLink is inactive. +#define NVML_NVLINK_STATE_ACTIVE 0x1 //!< NVLink is active. +#define NVML_NVLINK_STATE_SLEEP 0x2 //!< NVLink is in sleep state. + +/** + * Represents Nvlink Version + */ +typedef enum nvmlNvlinkVersion_enum +{ + NVML_NVLINK_VERSION_INVALID = 0, //!< NVLink version is invalid + NVML_NVLINK_VERSION_1_0 = 1, //!< NVLink Version 1.0 + NVML_NVLINK_VERSION_2_0 = 2, //!< NVLink Version 2.0 + NVML_NVLINK_VERSION_2_2 = 3, //!< NVLink Version 2.2 + NVML_NVLINK_VERSION_3_0 = 4, //!< NVLink Version 3.0 + NVML_NVLINK_VERSION_3_1 = 5, //!< NVLink Version 3.1 + NVML_NVLINK_VERSION_4_0 = 6, //!< NVLink Version 4.0 + NVML_NVLINK_VERSION_5_0 = 7, //!< NVLink Version 5.0 +} nvmlNvlinkVersion_t; -#define NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES 23 +#define NVML_NVLINK_TOTAL_SUPPORTED_BW_MODES 23 //!< Total supported NVLink bandwidth modes. typedef struct { @@ -8896,7 +9145,7 @@ typedef struct unsigned char totalBwModes; } nvmlNvlinkSupportedBwModes_v1_t; typedef nvmlNvlinkSupportedBwModes_v1_t nvmlNvlinkSupportedBwModes_t; -#define nvmlNvlinkSupportedBwModes_v1 NVML_STRUCT_VERSION(NvlinkSupportedBwModes, 1) +#define nvmlNvlinkSupportedBwModes_v1 NVML_STRUCT_VERSION(NvlinkSupportedBwModes, 1) //!< Version macro for \a nvmlNvlinkSupportedBwModes_v1_t typedef struct { @@ -8905,7 +9154,7 @@ typedef struct unsigned char bwMode; } nvmlNvlinkGetBwMode_v1_t; typedef nvmlNvlinkGetBwMode_v1_t nvmlNvlinkGetBwMode_t; -#define nvmlNvlinkGetBwMode_v1 NVML_STRUCT_VERSION(NvlinkGetBwMode, 1) +#define nvmlNvlinkGetBwMode_v1 NVML_STRUCT_VERSION(NvlinkGetBwMode, 1) //!< Version macro for \a nvmlNvlinkGetBwMode_v1_t typedef struct { @@ -8914,7 +9163,7 @@ typedef struct unsigned char bwMode; } nvmlNvlinkSetBwMode_v1_t; typedef nvmlNvlinkSetBwMode_v1_t nvmlNvlinkSetBwMode_t; -#define nvmlNvlinkSetBwMode_v1 NVML_STRUCT_VERSION(NvlinkSetBwMode, 1) +#define nvmlNvlinkSetBwMode_v1 NVML_STRUCT_VERSION(NvlinkSetBwMode, 1) //!< Version macro for \a nvmlNvlinkSetBwMode_v1_t /** * Struct to represent per device NVLINK information v1 @@ -8924,14 +9173,14 @@ typedef struct unsigned int version; //!< IN - the API version number unsigned int isNvleEnabled; //!< OUT - NVLINK encryption enablement } nvmlNvLinkInfo_v1_t; -#define nvmlNvLinkInfo_v1 NVML_STRUCT_VERSION(NvLinkInfo, 1) +#define nvmlNvLinkInfo_v1 NVML_STRUCT_VERSION(NvLinkInfo, 1) //!< Version macro for \a nvmlNvLinkInfo_v1_t -#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_MSE 0x1 -#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR 0x2 -#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY 0x3 -#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN 0x4 -#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN 0x5 -#define NVML_NVLINK_FIRMWARE_VERSION_LENGTH 100 +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_MSE 0x1 //!< MSE ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR 0x2 //!< NETIR ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_UPHY 0x3 //!< NETIR UPHY ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_CLN 0x4 //!< NETIR CLN ucode type. +#define NVML_NVLINK_FIRMWARE_UCODE_TYPE_NETIR_DLN 0x5 //!< NETIR DLN ucode type. +#define NVML_NVLINK_FIRMWARE_VERSION_LENGTH 100 //!< Length of firmware version string. /** * Struct to represent NVLINK firmware Semantic versioning and ucode type @@ -8963,7 +9212,7 @@ typedef struct nvmlNvlinkFirmwareInfo_t firmwareInfo; //!< OUT - NVLINK Firmware info } nvmlNvLinkInfo_v2_t; typedef nvmlNvLinkInfo_v2_t nvmlNvLinkInfo_t; -#define nvmlNvLinkInfo_v2 NVML_STRUCT_VERSION(NvLinkInfo, 2) +#define nvmlNvLinkInfo_v2 NVML_STRUCT_VERSION(NvLinkInfo, 2) //!< Version macro for \a nvmlNvLinkInfo_v2_t /** * Retrieves the state of the device's NvLink for the link specified @@ -8992,7 +9241,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int * * @param device The identifier of the target device * @param link Specifies the NvLink link to be queried - * @param version Requested NvLink version from nvmlNvlinkVersion_t + * @param version Requested NvLink version from \ref nvmlNvlinkVersion_t * * @return * - \ref NVML_SUCCESS if \a version has been set @@ -9268,7 +9517,7 @@ nvmlReturn_t DECLDIR nvmlSystemGetNvlinkBwMode(unsigned int *nvlinkBwMode); /** * Get the supported NvLink Reduced Bandwidth Modes of the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param supportedBwMode Reference to \a nvmlNvlinkSupportedBwModes_t @@ -9285,7 +9534,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkSupportedBwModes(nvmlDevice_t device, /** * Get the NvLink Reduced Bandwidth Mode for the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param getBwMode Reference to \a nvmlNvlinkGetBwMode_t @@ -9302,7 +9551,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetNvlinkBwMode(nvmlDevice_t device, /** * Set the NvLink Reduced Bandwidth Mode for the device * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param setBwMode Reference to \a nvmlNvlinkSetBwMode_t @@ -10816,7 +11065,7 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetActiveVgpus(nvmlGpuInstance_t gpuInstance /** * Set vGPU scheduler state for the given GPU instance * - * %GB20X_OR_NEWER% + * For Blackwell &tm GB20x; or newer fully supported devices. * * Scheduler state and params will be allowed to set only when no VM is running within the GPU instance. * In \a nvmlVgpuSchedulerState_t, IFF enableARRMode is enabled then provide the avgFactor and frequency @@ -10842,7 +11091,7 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceSetVgpuSchedulerState(nvmlGpuInstance_t gpuI * Returns the vGPU scheduler state for the given GPU instance. * The information returned in \a nvmlVgpuSchedulerStateInfo_t is not relevant if the BEST EFFORT policy is set. * - * %GB20X_OR_NEWER% + * For Blackwell &tm GB20x; or newer fully supported devices. * * @param gpuInstance The GPU instance handle * @param pSchedulerStateInfo Reference in which \a pSchedulerStateInfo is returned @@ -10865,7 +11114,7 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerState(nvmlGpuInstance_t gpuI * * To get the entire logs, call the function atleast 5 times a second. * - * %GB20X_OR_NEWER% + * For Blackwell &tm GB20x; or newer fully supported devices. * * @param gpuInstance The GPU instance handle * @param pSchedulerLogInfo Reference in which \a pSchedulerLogInfo is written @@ -10884,7 +11133,7 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuSchedulerLog(nvmlGpuInstance_t gpuIns /** * Query the creatable vGPU placement ID of the vGPU type within a GPU instance. * - * %GB20X_OR_NEWER% + * For Blackwell &tm GB20x; or newer fully supported devices. * * An array of creatable vGPU placement IDs for the vGPU type ID indicated by \a pCreatablePlacementInfo->vgpuTypeId * is returned in the caller-supplied buffer of \a pCreatablePlacementInfo->placementIds. Memory needed for the @@ -10921,7 +11170,7 @@ nvmlReturn_t DECLDIR nvmlGpuInstanceGetVgpuTypeCreatablePlacements(nvmlGpuInstan * set the correct version number to retrieve the vGPU heterogeneous mode. * \a pHeterogeneousMode->mode can either be \ref NVML_FEATURE_ENABLED or \ref NVML_FEATURE_DISABLED. * - * %GB20X_OR_NEWER% + * For Blackwell &tm GB20x; or newer fully supported devices. * * @param gpuInstance The GPU instance handle * @param pHeterogeneousMode Pointer to the caller-provided structure of nvmlVgpuHeterogeneousMode_t @@ -11666,7 +11915,7 @@ nvmlReturn_t DECLDIR nvmlGetExcludedDeviceInfoByIndex(unsigned int index, nvmlEx */ /***************************************************************************************************/ -#define NVML_PRM_DATA_MAX_SIZE 496 +#define NVML_PRM_DATA_MAX_SIZE 496 //!< Maximum size of the PRM data. /** * Main PRM input structure */ @@ -11687,7 +11936,7 @@ typedef struct * Read or write a GPU PRM register. The input is assumed to be in TLV format in * network byte order. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * Supported on Linux only. * @@ -11702,12 +11951,96 @@ typedef struct * - \ref NVML_ERROR_INVALID_ARGUMENT if \p device or \p buffer are invalid * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to perform this operation * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device - * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the version specified in \p buffer is not supported */ nvmlReturn_t DECLDIR nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v1_t *buffer); /** @} */ +/** + * PRM Counter IDs + */ +typedef enum +{ + NVML_PRM_COUNTER_ID_NONE = 0, + /* Physical Layer Counters (PPCNT group 0x12) */ + NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_LINK_DOWN_EVENTS = 1, + NVML_PRM_COUNTER_ID_PPCNT_PHYSICAL_LAYER_CTRS_SUCCESSFUL_RECOVERY_EVENTS = 2, + /* Recovery counters (PPCNT group 0x1A) */ + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TOTAL_SUCCESSFUL_RECOVERY_EVENTS = 101, + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_SINCE_LAST_RECOVERY = 102, + NVML_PRM_COUNTER_ID_PPCNT_RECOVERY_CTRS_TIME_BETWEEN_LAST_TWO_RECOVERIES = 103, + /* Infiniband PortCounters Attribute (PPCNT group 0x20) */ + NVML_PRM_COUNTER_ID_PPCNT_PORTCOUNTERS_PORT_XMIT_WAIT = 201, + /* PLR counters (PPCNT group 0x22) */ + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODES = 301, + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_CODE_ERR = 302, + NVML_PRM_COUNTER_ID_PPCNT_PLR_RCV_UNCORRECTABLE_CODE = 303, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_CODES = 304, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_CODES = 305, + NVML_PRM_COUNTER_ID_PPCNT_PLR_XMIT_RETRY_EVENTS = 306, + NVML_PRM_COUNTER_ID_PPCNT_PLR_SYNC_EVENTS = 307, + /* PPRM counters */ + NVML_PRM_COUNTER_ID_PPRM_OPER_RECOVERY = 1001, +} nvmlPRMCounterId_t; + +/** + * PRM counter input values + */ +typedef struct +{ + unsigned int localPort; //!< Local port number +} nvmlPRMCounterInput_v1_t; + +/** + * PRM Counter Value Structure + */ +typedef struct +{ + nvmlReturn_t status; //!< Status of the PRM counter read + nvmlValueType_t outputType; //!< Output value type + nvmlValue_t outputValue; //!< Output value +} nvmlPRMCounterValue_v1_t; + +/** + * PRM Counter Structure v1 + */ +typedef struct +{ + unsigned int counterId; //!< Counter ID, one of \ref nvmlPRMCounterId_t + /* Input data */ + nvmlPRMCounterInput_v1_t inData; //!< PRM input values + /* Output counter value */ + nvmlPRMCounterValue_v1_t counterValue; //!< Counter value +} nvmlPRMCounter_v1_t; + +/** + * PRM Counter List Structure v1 + */ +typedef struct +{ + unsigned int numCounters; //!< Number of counters + nvmlPRMCounter_v1_t *counters; //!< Pointer to array of PRM counters +} nvmlPRMCounterList_v1_t; + +/** + * Read a list of GPU PRM Counters. + * + * For Blackwell &tm; or newer fully supported devices. + * + * Supported on Linux only. + * + * @param device Identifer of target GPU device + * @param counterList Structure holding the input parameters as well as the retrieved counter values + * + * @return + * - \ref NVML_SUCCESS on success + * - \ref NVML_ERROR_INVALID_ARGUMENT if \p device is invalid or \p counterList is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to perform this operation + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by the device + * - \ref NVML_ERROR_UNKNOWN on any other error + */ +nvmlReturn_t DECLDIR nvmlDeviceReadPRMCounters_v1(nvmlDevice_t device, nvmlPRMCounterList_v1_t *counterList); + /***************************************************************************************************/ /** @defgroup nvmlMultiInstanceGPU Multi Instance GPU Management * This chapter describes NVML operations that are associated with Multi Instance GPU management. @@ -11718,12 +12051,12 @@ nvmlReturn_t DECLDIR nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v /** * Disable Multi Instance GPU mode. */ -#define NVML_DEVICE_MIG_DISABLE 0x0 +#define NVML_DEVICE_MIG_DISABLE 0x0 //!< Disable Multi Instance GPU mode. /** * Enable Multi Instance GPU mode. */ -#define NVML_DEVICE_MIG_ENABLE 0x1 +#define NVML_DEVICE_MIG_ENABLE 0x1 //!< Enable Multi Instance GPU mode. /** * GPU instance profiles. @@ -11731,33 +12064,33 @@ nvmlReturn_t DECLDIR nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v * These macros should be passed to \ref nvmlDeviceGetGpuInstanceProfileInfo to retrieve the * detailed information about a GPU instance such as profile ID, engine counts. */ -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0 -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE 0x1 -#define NVML_GPU_INSTANCE_PROFILE_3_SLICE 0x2 -#define NVML_GPU_INSTANCE_PROFILE_4_SLICE 0x3 -#define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 -#define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 -#define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0 //!< 1_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE 0x1 //!< 2_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_3_SLICE 0x2 //!< 3_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_4_SLICE 0x3 //!< 4_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_7_SLICE 0x4 //!< 7_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_8_SLICE 0x5 //!< 8_SLICE GPU instance profile. +#define NVML_GPU_INSTANCE_PROFILE_6_SLICE 0x6 //!< 6_SLICE GPU instance profile. // 1_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV1 0x7 //!< 1_SLICE GPU instance profile (rev1). // 2_SLICE profile with at least one (if supported at all) of Decoder, Encoder, JPEG, OFA engines. -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 0x8 +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_REV1 0x8 //!< 2_SLICE GPU instance profile (rev1). // 1_SLICE profile with twice the amount of memory resources. -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 0x9 +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_REV2 0x9 //!< 1_SLICE GPU instance profile (rev2). // 1_SLICE gfx capable profile -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX 0x0A +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_GFX 0x0A //!< 1_SLICE gfx capable profile. // 2_SLICE gfx capable profile -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX 0x0B +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_GFX 0x0B //!< 2_SLICE gfx capable profile. // 4_SLICE gfx capable profile -#define NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX 0x0C +#define NVML_GPU_INSTANCE_PROFILE_4_SLICE_GFX 0x0C //!< 4_SLICE gfx capable profile. // 1_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME 0x0D +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_NO_ME 0x0D //!< 1_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. // 2_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. -#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME 0x0E +#define NVML_GPU_INSTANCE_PROFILE_2_SLICE_NO_ME 0x0E //!< 2_SLICE profile with none of Decode, Encoder, JPEG, OFA engines. // 1_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. // Allocation of instance of this profile prevents allocation of // all but _NO_ME profiles. -#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME 0x0F +#define NVML_GPU_INSTANCE_PROFILE_1_SLICE_ALL_ME 0x0F //!< 1_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. // 2_SLICE profile with all of GPU Decode, Encoder, JPEG, OFA engines. // Allocation of instance of this profile prevents allocation of // all but _NO_ME profiles. @@ -11770,9 +12103,9 @@ nvmlReturn_t DECLDIR nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v * Bit field values representing MIG profile capabilities * \ref nvmlGpuInstanceProfileInfo_v3_t.capabilities */ -#define NVML_GPU_INSTANCE_PROFILE_CAPS_P2P 0x1 -#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 //!< Deprecated, do not use -#define NVML_GPU_INSTANCE_PROFILE_CAPS_GFX 0x2 +#define NVML_GPU_INSTANCE_PROFILE_CAPS_P2P 0x1 //!< Peer-to-Peer support. +#define NVML_GPU_INTSTANCE_PROFILE_CAPS_P2P 0x1 //!< Deprecated, do not use +#define NVML_GPU_INSTANCE_PROFILE_CAPS_GFX 0x2 //!< GFX support. /** * MIG compute instance profile capability. @@ -11878,18 +12211,18 @@ typedef struct nvmlGpuInstanceInfo_st * These macros should be passed to \ref nvmlGpuInstanceGetComputeInstanceProfileInfo to retrieve the * detailed information about a compute instance such as profile ID, engine counts */ -#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0 -#define NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE 0x1 -#define NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE 0x2 -#define NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE 0x3 -#define NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE 0x4 -#define NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE 0x5 -#define NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE 0x6 -#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 0x7 -#define NVML_COMPUTE_INSTANCE_PROFILE_COUNT 0x8 +#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0 //!< 1_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_2_SLICE 0x1 //!< 2_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_3_SLICE 0x2 //!< 3_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_4_SLICE 0x3 //!< 4_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_7_SLICE 0x4 //!< 7_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_8_SLICE 0x5 //!< 8_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_6_SLICE 0x6 //!< 6_SLICE compute instance profile. +#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE_REV1 0x7 //!< 1_SLICE compute instance profile (rev1). +#define NVML_COMPUTE_INSTANCE_PROFILE_COUNT 0x8 //!< Number of compute instance profiles. -#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0 //!< All the engines except multiprocessors would be shared -#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT 0x1 +#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0 //!< All the engines except multiprocessors would be shared. +#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_COUNT 0x1 //!< Number of engine profiles. typedef struct nvmlComputeInstancePlacement_st { @@ -12701,6 +13034,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDeviceHandleFromMigDeviceHandle(nvmlDevice_t m /***************************************************************************************************/ /** @defgroup GPM NVML GPM + * @note For NVIDIA vGPU Software products + * @note (A) GPM is supported only on MIG-backed vGPU profiles that are allocated all of the instance's frame buffer + * @note (B) No GPM support on Windows * @{ */ /***************************************************************************************************/ @@ -12918,17 +13254,18 @@ typedef struct /** * GPM metric information. */ +typedef struct { + char *shortName; + char *longName; + char *unit; +} nvmlGpmMetricMetricInfo_t; + typedef struct { unsigned int metricId; //!< IN: NVML_GPM_METRIC_? define of which metric to retrieve nvmlReturn_t nvmlReturn; //!< OUT: Status of this metric. If this is nonzero, then value is not valid double value; //!< OUT: Value of this metric. Is only valid if nvmlReturn is 0 (NVML_SUCCESS) - struct - { - char *shortName; - char *longName; - char *unit; - } metricInfo; //!< OUT: Metric name and unit. Those can be NULL if not defined + nvmlGpmMetricMetricInfo_t metricInfo; //!< OUT: Metric name and unit. Those can be NULL if not defined } nvmlGpmMetric_t; /** @@ -12973,9 +13310,12 @@ typedef struct * and \a metricsGet->sample2 by calling \a nvmlGpmSampleAlloc(). Next, the user should fill in the ID of each metric * in \a metricsGet->metrics[i].metricId and specify the total number of metrics to retrieve in \a metricsGet->numMetrics, * The version should be set to NVML_GPM_METRICS_GET_VERSION in \a metricsGet->version. The user then calls the - * \a nvmlGpmSampleGet() API twice to obtain 2 samples of counters. \note that the interval between these - * two \a nvmlGpmSampleGet() calls should be greater than 100ms due to the internal sample refresh rate. - * Finally, the user calls \a nvmlGpmMetricsGet to retrieve the metrics, which will be stored at \a metricsGet->metrics + * \a nvmlGpmSampleGet() API twice to obtain 2 samples of counters. + * + * @note The interval between these two \a nvmlGpmSampleGet() calls should be greater than 100ms due to the + * internal sample refresh rate. Finally, the user calls \a nvmlGpmMetricsGet to retrieve the metrics, which will + * be stored at \a metricsGet->metrics + * * * @param metricsGet IN/OUT: populated \a nvmlGpmMetricsGet_t struct * @@ -13110,7 +13450,7 @@ nvmlReturn_t DECLDIR nvmlGpmSetStreamingEnabled(nvmlDevice_t device, unsigned in /** @} */ // @defgroup nvmlGpmFunctions /** @} */ // @defgroup GPM -#define NVML_DEV_CAP_EGM (1 << 0) // Extended GPU memory +#define NVML_DEV_CAP_EGM (1 << 0) //!< Extended GPU memory /** * Device capabilities */ @@ -13146,19 +13486,19 @@ nvmlReturn_t DECLDIR nvmlDeviceGetCapabilities(nvmlDevice_t device, /* * Generic bitmask to hold 255 bits, represented by 8 elements of 32 bits */ -#define NVML_255_MASK_BITS_PER_ELEM 32 -#define NVML_255_MASK_NUM_ELEMS 8 +#define NVML_255_MASK_BITS_PER_ELEM 32 //!< Number of bits per element. +#define NVML_255_MASK_NUM_ELEMS 8 //!< Number of elements. #define NVML_255_MASK_BIT_SET(index, nvmlMask) \ - nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Set bit at index. #define NVML_255_MASK_BIT_GET(index, nvmlMask) \ - nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask.mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Get bit at index. #define NVML_255_MASK_BIT_SET_PTR(index, nvmlMask) \ - nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] |= (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Set bit at index. #define NVML_255_MASK_BIT_GET_PTR(index, nvmlMask) \ - nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) + nvmlMask->mask[index / NVML_255_MASK_BITS_PER_ELEM] & (1 << (index % NVML_255_MASK_BITS_PER_ELEM)) //!< Get bit at index. typedef struct { @@ -13192,6 +13532,18 @@ typedef enum NVML_POWER_PROFILE_MAX = 15, } nvmlPowerProfileType_t; +/** + * Enum for operation to perform on the requested profiles + */ +typedef enum +{ + NVML_POWER_PROFILE_OPERATION_CLEAR = 0, //!< Remove the requested profiles from the existing list of requested profiles + NVML_POWER_PROFILE_OPERATION_SET = 1, //!< Add the requested profiles to the existing list of requested profiles + NVML_POWER_PROFILE_OPERATION_SET_AND_OVERWRITE = 2, //!< Overwrite the existing list of requested profiles with just the requested profiles + + NVML_POWER_PROFILE_OPERATION_MAX = 3, //!< Max value above +1 +} nvmlPowerProfileOperation_t; + /** * Profile Metadata */ @@ -13203,7 +13555,7 @@ typedef struct nvmlMask255_t conflictingMask; //!< Mask of conflicting performance profiles } nvmlWorkloadPowerProfileInfo_v1_t; typedef nvmlWorkloadPowerProfileInfo_v1_t nvmlWorkloadPowerProfileInfo_t; -#define nvmlWorkloadPowerProfileInfo_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileInfo, 1) +#define nvmlWorkloadPowerProfileInfo_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileInfo, 1) //!< Version macro for \a nvmlWorkloadPowerProfileInfo_v1_t /** * Profiles Info @@ -13215,7 +13567,7 @@ typedef struct nvmlWorkloadPowerProfileInfo_t perfProfile[NVML_WORKLOAD_POWER_MAX_PROFILES]; //!< Array of performance profile info parameters } nvmlWorkloadPowerProfileProfilesInfo_v1_t; typedef nvmlWorkloadPowerProfileProfilesInfo_v1_t nvmlWorkloadPowerProfileProfilesInfo_t; -#define nvmlWorkloadPowerProfileProfilesInfo_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileProfilesInfo, 1) +#define nvmlWorkloadPowerProfileProfilesInfo_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileProfilesInfo, 1) //!< Version macro for \a nvmlWorkloadPowerProfileProfilesInfo_v1_t /** * Current Profiles @@ -13239,12 +13591,22 @@ typedef struct nvmlMask255_t requestedProfilesMask; //!< Mask of 255 bits, each bit representing index of respective perf profile } nvmlWorkloadPowerProfileRequestedProfiles_v1_t; typedef nvmlWorkloadPowerProfileRequestedProfiles_v1_t nvmlWorkloadPowerProfileRequestedProfiles_t; -#define nvmlWorkloadPowerProfileRequestedProfiles_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileRequestedProfiles, 1) +#define nvmlWorkloadPowerProfileRequestedProfiles_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileRequestedProfiles, 1) //!< Version macro for \a nvmlWorkloadPowerProfileRequestedProfiles_v1_t + +/** + * Update Profiles + */ +typedef struct +{ + nvmlPowerProfileOperation_t operation; //!< Operation to perform + nvmlMask255_t updateProfilesMask; //!< Mask of 255 bits, each bit representing index of respective perf profile +} nvmlWorkloadPowerProfileUpdateProfiles_v1_t; +#define nvmlWorkloadPowerProfileUpdateProfiles_v1 NVML_STRUCT_VERSION(WorkloadPowerProfileUpdateProfiles, 1) /** * Get Performance Profiles Information * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * See \ref nvmlWorkloadPowerProfileProfilesInfo_v1_t for more information on the struct. * The mask \a perfProfilesMask is bitmask of all supported mode indices where the * mode is supported if the index is 1. Each supported mode will have a corresponding @@ -13271,7 +13633,7 @@ nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileGetProfilesInfo(nvmlDevice_t /** * Get Current Performance Profiles * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * See \ref nvmlWorkloadPowerProfileCurrentProfiles_v1_t for more information on the struct. * This API returns a stuct which contains the current \a perfProfilesMask, * \a requestedProfilesMask and \a enforcedProfilesMask. Each bit set in each @@ -13293,9 +13655,10 @@ nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileGetProfilesInfo(nvmlDevice_t nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(nvmlDevice_t device, nvmlWorkloadPowerProfileCurrentProfiles_t *currentProfiles); /** + * @deprecated Use \ref nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1 instead * Set Requested Performance Profiles * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * See \ref nvmlWorkloadPowerProfileRequestedProfiles_v1_t for more information on the struct. * Reuqest one or more performance profiles be activated using the input bitmask * \a requestedProfilesMask, where each bit set corresponds to a supported bit from @@ -13315,12 +13678,13 @@ nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(nvmlDevice * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(nvmlDevice_t device, - nvmlWorkloadPowerProfileRequestedProfiles_t *requestedProfiles); +DEPRECATED(13.1) nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(nvmlDevice_t device, + nvmlWorkloadPowerProfileRequestedProfiles_t *requestedProfiles); /** + * @deprecated Use \ref nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1 instead * Clear Requested Performance Profiles * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * See \ref nvmlWorkloadPowerProfileRequestedProfiles_v1_t for more information on the struct. * Clear one or more performance profiles be using the input bitmask * \a requestedProfilesMask, where each bit set corresponds to a supported bit from @@ -13340,8 +13704,34 @@ nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileSetRequestedProfiles(nvmlDevi * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH If the provided version is invalid/unsupported * - \ref NVML_ERROR_UNKNOWN On any unexpected error */ -nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(nvmlDevice_t device, - nvmlWorkloadPowerProfileRequestedProfiles_t *requestedProfiles); +DEPRECATED(13.1) nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(nvmlDevice_t device, + nvmlWorkloadPowerProfileRequestedProfiles_t *requestedProfiles); + +/** + * Update Requested Performance Profiles + * + * For Blackwell &tm; or newer fully supported devices. + * See \ref nvmlWorkloadPowerProfileUpdateProfiles_v1_t for more information on the struct. + * Update the requested performance profiles using the input bitmask + * \a updateProfilesMask, where each bit set corresponds to a supported bit from + * the \a perfProfilesMask. + * The \a operation parameter specifies the operation to perform, see \ref nvmlPowerProfileOperation_t for more information. + * Requires root/admin permissions. + * + * @param device The identifier of the target device + * @param updateProfiles Reference to struct \a nvmlWorkloadPowerProfileUpdateProfiles_v1_t + * + * @return + * - \ref NVML_SUCCESS If the query is successful + * - \ref NVML_ERROR_UNINITIALIZED If the library has not been successfully initialized + * - \ref NVML_ERROR_INVALID_ARGUMENT If \a device is invalid or \a pointer to struct is NULL + * - \ref NVML_ERROR_NOT_SUPPORTED If the device does not support this feature + * - \ref NVML_ERROR_GPU_IS_LOST If the target GPU has fallen off the bus or is otherwise inaccessible + * - \ref NVML_ERROR_UNKNOWN On any unexpected error + */ +nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileUpdateProfiles_v1(nvmlDevice_t device, + nvmlWorkloadPowerProfileUpdateProfiles_v1_t *updateProfiles); + /** @} */ // @defgroup /***************************************************************************************************/ @@ -13349,16 +13739,28 @@ nvmlReturn_t DECLDIR nvmlDeviceWorkloadPowerProfileClearRequestedProfiles(nvmlDe * @{ */ /***************************************************************************************************/ +/** + * Macro for accomodating the gap in field values for delayed power smoothing. + */ #define NVML_POWER_SMOOTHING_IDX_FROM_FIELD_VAL(field_val) \ - (field_val - NVML_FI_PWR_SMOOTHING_ENABLED) - -#define NVML_POWER_SMOOTHING_MAX_NUM_PROFILES 5 -#define NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS 4 -#define NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET 0xFFFFFFFFU -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR 0 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE 1 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE 2 -#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS 3 + ( \ + (field_val > NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL) ? \ + (field_val - NVML_FI_PWR_SMOOTHING_ENABLED - \ + (NVML_FI_PWR_SMOOTHING_PRIMARY_POWER_FLOOR - NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL - 1)) : \ + (field_val - NVML_FI_PWR_SMOOTHING_ENABLED) \ + ) //!< Index from field value. + +#define NVML_POWER_SMOOTHING_MAX_NUM_PROFILES 5 //!< Maximum number of profiles. +#define NVML_POWER_SMOOTHING_NUM_PROFILE_PARAMS 8 //!< Number of profile parameters. +#define NVML_POWER_SMOOTHING_ADMIN_OVERRIDE_NOT_SET 0xFFFFFFFFU //!< Admin override not set. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR 0 //!< Percent temperature floor. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE 1 //!< Ramp up rate. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_RATE 2 //!< Ramp down rate. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_DOWN_HYSTERESIS 3 //!< Ramp down hysteresis. +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_SECONDARY_POWER_FLOOR 4 //!< Secondary power floor value in Watts for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_WIN_MULT 5 //!< Primary floor activation window multiplier value for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_TAR_WIN_MULT 6 //!< Primary floor target window multiplier value for a given profile +#define NVML_POWER_SMOOTHING_PROFILE_PARAM_PRIMARY_FLOOR_ACT_OFFSET 7 //!< Primary floor activation offset value in Watts for a given profile /** * Power Smoothing Structure for Profile information @@ -13372,7 +13774,7 @@ typedef struct double value; //!< The requested value for the given parameter } nvmlPowerSmoothingProfile_v1_t; typedef nvmlPowerSmoothingProfile_v1_t nvmlPowerSmoothingProfile_t; -#define nvmlPowerSmoothingProfile_v1 NVML_STRUCT_VERSION(PowerSmoothingProfile, 1) +#define nvmlPowerSmoothingProfile_v1 NVML_STRUCT_VERSION(PowerSmoothingProfile, 1) //!< Version macro for \a nvmlPowerSmoothingProfile_v1_t /** * Power Smoothing Structure for Feature Enablement @@ -13383,7 +13785,7 @@ typedef struct nvmlEnableState_t state; //!< 0/Disabled or 1/Enabled } nvmlPowerSmoothingState_v1_t; typedef nvmlPowerSmoothingState_v1_t nvmlPowerSmoothingState_t; -#define nvmlPowerSmoothingState_v1 NVML_STRUCT_VERSION(PowerSmoothingState, 1) +#define nvmlPowerSmoothingState_v1 NVML_STRUCT_VERSION(PowerSmoothingState, 1) //!< Version macro for \a nvmlPowerSmoothingState_v1_t /** * Activiate a specific preset profile for datacenter power smoothing. @@ -13391,7 +13793,7 @@ typedef nvmlPowerSmoothingState_v1_t nvmlPowerSmoothingState_t; * and ignores the other parameters of the structure. * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * @param device The identifier of the target device * @param profile Reference to \ref nvmlPowerSmoothingProfile_v1_t. @@ -13412,7 +13814,7 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingActivatePresetProfile(nvmlDevice_t * Update the value of a specific profile parameter contained within \ref nvmlPowerSmoothingProfile_v1_t. * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * NVML_POWER_SMOOTHING_PROFILE_PARAM_PERCENT_TMP_FLOOR expects a value as a percentage from 00.00-100.00% * NVML_POWER_SMOOTHING_PROFILE_PARAM_RAMP_UP_RATE expects a value in W/s @@ -13435,7 +13837,7 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingUpdatePresetProfileParam(nvmlDevice * Enable or disable the Power Smoothing Feature. * Requires root/admin permissions. * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * See \ref nvmlEnableState_t for details on allowed states * @@ -13456,7 +13858,7 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingSetState(nvmlDevice_t device, /** * Retrieves the counts of SRAM unique uncorrected ECC errors * - * %BLACKWELL_OR_NEWER% + * For Blackwell &tm; or newer fully supported devices. * * Reads SRAM unique uncorrected ECC error counts. The total number of unique errors is returned by * \a errorCounts->entryCount. Error counts are returned as an array of in the caller-supplied buffer pointed at by @@ -13492,6 +13894,23 @@ nvmlReturn_t DECLDIR nvmlDevicePowerSmoothingSetState(nvmlDevice_t device, nvmlReturn_t DECLDIR nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice_t device, nvmlEccSramUniqueUncorrectedErrorCounts_t *errorCounts); +/** + * Set Read-only user shared data (RUSD) settings for GPU. + * Requires root/admin permissions. + * + * @param device The identifier of the target device + * @param settings Reference to \ref nvmlRusdSettings_v1_t struct + * + * @return + * - \ref NVML_SUCCESS if the RUSD setting was successfully set + * - \ref NVML_ERROR_INVALID_ARGUMENT if device is invalid or state is NULL + * - \ref NVML_ERROR_NO_PERMISSION if user does not have permission to change feature state + * - \ref NVML_ERROR_NOT_SUPPORTED if this feature is not supported by NVIDIA kernel driver + * - \ref NVML_ERROR_ARGUMENT_VERSION_MISMATCH if the input version is not supported + * + **/ +nvmlReturn_t DECLDIR nvmlDeviceSetRusdSettings_v1(nvmlDevice_t device, nvmlRusdSettings_v1_t *settings); + /** * NVML API versioning support */ diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go index efa5863..32356f3 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/types_gen.go @@ -22,7 +22,7 @@ type PciInfoExt_v1 struct { PciSubSystemId uint32 BaseClass uint32 SubClass uint32 - BusId [32]uint8 + BusId [32]int8 } type PciInfoExt struct { @@ -34,17 +34,17 @@ type PciInfoExt struct { PciSubSystemId uint32 BaseClass uint32 SubClass uint32 - BusId [32]uint8 + BusId [32]int8 } type PciInfo struct { - BusIdLegacy [16]uint8 + BusIdLegacy [16]int8 Domain uint32 Bus uint32 Device uint32 PciDeviceId uint32 PciSubSystemId uint32 - BusId [32]uint8 + BusId [32]int8 } type EccErrorCounts struct { @@ -158,6 +158,10 @@ type RepairStatus struct { BTpcRepairPending uint32 } +type UnrepairableMemoryStatus_v1 struct { + BUnrepairableMemory uint32 +} + type RowRemapperHistogramValues struct { Max uint32 High uint32 @@ -195,9 +199,17 @@ type ViolationTime struct { ViolationTime uint64 } +type GpuThermalSettingsSensor struct { + Controller int32 + DefaultMinTemp int32 + DefaultMaxTemp int32 + CurrentTemp int32 + Target int32 +} + type GpuThermalSettings struct { Count uint32 - Sensor [3]_Ctype_struct___28 + Sensor [3]GpuThermalSettingsSensor } type CoolerInfo_v1 struct { @@ -305,22 +317,22 @@ type FanSpeedInfo struct { type DevicePerfModes_v1 struct { Version uint32 - Str [2048]uint8 + Str [2048]int8 } type DevicePerfModes struct { Version uint32 - Str [2048]uint8 + Str [2048]int8 } type DeviceCurrentClockFreqs_v1 struct { Version uint32 - Str [2048]uint8 + Str [2048]int8 } type DeviceCurrentClockFreqs struct { Version uint32 - Str [2048]uint8 + Str [2048]int8 } type DevicePowerMizerModes_v1 struct { @@ -434,6 +446,10 @@ type PlatformInfo struct { Pad_cgo_0 [3]byte } +type Hostname_v1 struct { + Value [64]int8 +} + type EccSramUniqueUncorrectedErrorEntry_v1 struct { Unit uint32 Location uint32 @@ -456,6 +472,11 @@ type EccSramUniqueUncorrectedErrorCounts struct { Entries *EccSramUniqueUncorrectedErrorEntry_v1 } +type RusdSettings_v1 struct { + Version uint32 + PollMask uint64 +} + type DeviceArchitecture uint32 type BusType uint32 @@ -464,9 +485,16 @@ type FanControlPolicy uint32 type PowerSource uint32 +type GpuDynamicPstatesInfoUtilization struct { + BIsPresent uint32 + Percentage uint32 + IncThreshold uint32 + DecThreshold uint32 +} + type GpuDynamicPstatesInfo struct { Flags uint32 - Utilization [8]_Ctype_struct___23 + Utilization [8]GpuDynamicPstatesInfoUtilization } type PowerScopeType byte @@ -576,7 +604,7 @@ type VgpuInstancesUtilizationInfo struct { type VgpuProcessUtilizationSample struct { VgpuInstance uint32 Pid uint32 - ProcessName [64]uint8 + ProcessName [64]int8 TimeStamp uint64 SmUtil uint32 MemUtil uint32 @@ -585,7 +613,7 @@ type VgpuProcessUtilizationSample struct { } type VgpuProcessUtilizationInfo_v1 struct { - ProcessName [64]uint8 + ProcessName [64]int8 TimeStamp uint64 VgpuInstance uint32 Pid uint32 @@ -621,6 +649,15 @@ type VgpuRuntimeState struct { Size uint64 } +type VgpuSchedulerParamsVgpuSchedDataWithARR struct { + AvgFactor uint32 + Timeslice uint32 +} + +type VgpuSchedulerParamsVgpuSchedData struct { + Timeslice uint32 +} + const sizeofVgpuSchedulerParams = unsafe.Sizeof([8]byte{}) type VgpuSchedulerParams [sizeofVgpuSchedulerParams]byte @@ -649,6 +686,15 @@ type VgpuSchedulerGetState struct { SchedulerParams [8]byte } +type VgpuSchedulerSetParamsVgpuSchedDataWithARR struct { + AvgFactor uint32 + Frequency uint32 +} + +type VgpuSchedulerSetParamsVgpuSchedData struct { + Timeslice uint32 +} + const sizeofVgpuSchedulerSetParams = unsafe.Sizeof([8]byte{}) type VgpuSchedulerSetParams [sizeofVgpuSchedulerSetParams]byte @@ -701,8 +747,8 @@ type GridLicenseExpiry struct { type GridLicensableFeature struct { FeatureCode uint32 FeatureState uint32 - LicenseInfo [128]uint8 - ProductName [128]uint8 + LicenseInfo [128]int8 + ProductName [128]int8 FeatureEnabled uint32 LicenseExpiry GridLicenseExpiry } @@ -839,23 +885,23 @@ type nvmlUnit struct { type HwbcEntry struct { HwbcId uint32 - FirmwareVersion [32]uint8 + FirmwareVersion [32]int8 } type LedState struct { - Cause [256]uint8 + Cause [256]int8 Color uint32 } type UnitInfo struct { - Name [96]uint8 - Id [96]uint8 - Serial [96]uint8 - FirmwareVersion [96]uint8 + Name [96]int8 + Id [96]int8 + Serial [96]int8 + FirmwareVersion [96]int8 } type PSUInfo struct { - State [256]uint8 + State [256]int8 Current uint32 Voltage uint32 Power uint32 @@ -1096,12 +1142,12 @@ type GpuFabricInfoV struct { type SystemDriverBranchInfo_v1 struct { Version uint32 - Branch [80]uint8 + Branch [80]int8 } type SystemDriverBranchInfo struct { Version uint32 - Branch [80]uint8 + Branch [80]int8 } type AffinityScope uint32 @@ -1196,24 +1242,24 @@ type nvmlVgpuMetadata struct { Version uint32 Revision uint32 GuestInfoState uint32 - GuestDriverVersion [80]uint8 - HostDriverVersion [80]uint8 + GuestDriverVersion [80]int8 + HostDriverVersion [80]int8 Reserved [6]uint32 VgpuVirtualizationCaps uint32 GuestVgpuVersion uint32 OpaqueDataSize uint32 - OpaqueData [4]uint8 + OpaqueData [4]int8 } type nvmlVgpuPgpuMetadata struct { Version uint32 Revision uint32 - HostDriverVersion [80]uint8 + HostDriverVersion [80]int8 PgpuVirtualizationCaps uint32 Reserved [5]uint32 HostSupportedVgpuRange VgpuVersion OpaqueDataSize uint32 - OpaqueData [4]uint8 + OpaqueData [4]int8 } type VgpuPgpuCompatibility struct { @@ -1223,7 +1269,7 @@ type VgpuPgpuCompatibility struct { type ExcludedDeviceInfo struct { PciInfo PciInfo - Uuid [80]uint8 + Uuid [80]int8 } type PRMTLV_v1 struct { @@ -1232,6 +1278,27 @@ type PRMTLV_v1 struct { InData [496]uint8 } +type PRMCounterInput_v1 struct { + LocalPort uint32 +} + +type PRMCounterValue_v1 struct { + Status uint32 + OutputType uint32 + OutputValue [8]byte +} + +type PRMCounter_v1 struct { + CounterId uint32 + InData PRMCounterInput_v1 + CounterValue PRMCounterValue_v1 +} + +type PRMCounterList_v1 struct { + NumCounters uint32 + Counters *PRMCounter_v1 +} + type GpuInstancePlacement struct { Start uint32 Size uint32 @@ -1264,7 +1331,7 @@ type GpuInstanceProfileInfo_v2 struct { JpegCount uint32 OfaCount uint32 MemorySizeMB uint64 - Name [96]uint8 + Name [96]int8 } type GpuInstanceProfileInfo_v3 struct { @@ -1279,7 +1346,7 @@ type GpuInstanceProfileInfo_v3 struct { JpegCount uint32 OfaCount uint32 MemorySizeMB uint64 - Name [96]uint8 + Name [96]int8 Capabilities uint32 Pad_cgo_0 [4]byte } @@ -1319,7 +1386,7 @@ type ComputeInstanceProfileInfo_v2 struct { SharedEncoderCount uint32 SharedJpegCount uint32 SharedOfaCount uint32 - Name [96]uint8 + Name [96]int8 } type ComputeInstanceProfileInfo_v3 struct { @@ -1333,7 +1400,7 @@ type ComputeInstanceProfileInfo_v3 struct { SharedEncoderCount uint32 SharedJpegCount uint32 SharedOfaCount uint32 - Name [96]uint8 + Name [96]int8 Capabilities uint32 } @@ -1353,11 +1420,17 @@ type nvmlGpmSample struct { Handle *_Ctype_struct_nvmlGpmSample_st } +type GpmMetricMetricInfo struct { + ShortName *int8 + LongName *int8 + Unit *int8 +} + type GpmMetric struct { MetricId uint32 NvmlReturn uint32 Value float64 - MetricInfo _Ctype_struct___19 + MetricInfo GpmMetricMetricInfo } type nvmlGpmMetricsGetType struct { @@ -1437,6 +1510,11 @@ type WorkloadPowerProfileRequestedProfiles struct { RequestedProfilesMask Mask255 } +type WorkloadPowerProfileUpdateProfiles_v1 struct { + Operation uint32 + UpdateProfilesMask Mask255 +} + type PowerSmoothingProfile_v1 struct { Version uint32 ProfileId uint32 diff --git a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go index f4930ff..d9f1255 100644 --- a/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go +++ b/vendor/github.com/NVIDIA/go-nvml/pkg/nvml/zz_generated.api.go @@ -120,6 +120,7 @@ var ( DeviceGetHandleByUUID = libnvml.DeviceGetHandleByUUID DeviceGetHandleByUUIDV = libnvml.DeviceGetHandleByUUIDV DeviceGetHostVgpuMode = libnvml.DeviceGetHostVgpuMode + DeviceGetHostname_v1 = libnvml.DeviceGetHostname_v1 DeviceGetIndex = libnvml.DeviceGetIndex DeviceGetInforomConfigurationChecksum = libnvml.DeviceGetInforomConfigurationChecksum DeviceGetInforomImageVersion = libnvml.DeviceGetInforomImageVersion @@ -215,6 +216,7 @@ var ( DeviceGetTotalEccErrors = libnvml.DeviceGetTotalEccErrors DeviceGetTotalEnergyConsumption = libnvml.DeviceGetTotalEnergyConsumption DeviceGetUUID = libnvml.DeviceGetUUID + DeviceGetUnrepairableMemoryFlag_v1 = libnvml.DeviceGetUnrepairableMemoryFlag_v1 DeviceGetUtilizationRates = libnvml.DeviceGetUtilizationRates DeviceGetVbiosVersion = libnvml.DeviceGetVbiosVersion DeviceGetVgpuCapabilities = libnvml.DeviceGetVgpuCapabilities @@ -238,6 +240,7 @@ var ( DevicePowerSmoothingSetState = libnvml.DevicePowerSmoothingSetState DevicePowerSmoothingUpdatePresetProfileParam = libnvml.DevicePowerSmoothingUpdatePresetProfileParam DeviceQueryDrainState = libnvml.DeviceQueryDrainState + DeviceReadPRMCounters_v1 = libnvml.DeviceReadPRMCounters_v1 DeviceReadWritePRM_v1 = libnvml.DeviceReadWritePRM_v1 DeviceRegisterEvents = libnvml.DeviceRegisterEvents DeviceRemoveGpu = libnvml.DeviceRemoveGpu @@ -265,6 +268,7 @@ var ( DeviceSetGpcClkVfOffset = libnvml.DeviceSetGpcClkVfOffset DeviceSetGpuLockedClocks = libnvml.DeviceSetGpuLockedClocks DeviceSetGpuOperationMode = libnvml.DeviceSetGpuOperationMode + DeviceSetHostname_v1 = libnvml.DeviceSetHostname_v1 DeviceSetMemClkVfOffset = libnvml.DeviceSetMemClkVfOffset DeviceSetMemoryLockedClocks = libnvml.DeviceSetMemoryLockedClocks DeviceSetMigMode = libnvml.DeviceSetMigMode @@ -274,6 +278,7 @@ var ( DeviceSetPersistenceMode = libnvml.DeviceSetPersistenceMode DeviceSetPowerManagementLimit = libnvml.DeviceSetPowerManagementLimit DeviceSetPowerManagementLimit_v2 = libnvml.DeviceSetPowerManagementLimit_v2 + DeviceSetRusdSettings_v1 = libnvml.DeviceSetRusdSettings_v1 DeviceSetTemperatureThreshold = libnvml.DeviceSetTemperatureThreshold DeviceSetVgpuCapabilities = libnvml.DeviceSetVgpuCapabilities DeviceSetVgpuHeterogeneousMode = libnvml.DeviceSetVgpuHeterogeneousMode @@ -284,6 +289,7 @@ var ( DeviceWorkloadPowerProfileGetCurrentProfiles = libnvml.DeviceWorkloadPowerProfileGetCurrentProfiles DeviceWorkloadPowerProfileGetProfilesInfo = libnvml.DeviceWorkloadPowerProfileGetProfilesInfo DeviceWorkloadPowerProfileSetRequestedProfiles = libnvml.DeviceWorkloadPowerProfileSetRequestedProfiles + DeviceWorkloadPowerProfileUpdateProfiles_v1 = libnvml.DeviceWorkloadPowerProfileUpdateProfiles_v1 ErrorString = libnvml.ErrorString EventSetCreate = libnvml.EventSetCreate EventSetFree = libnvml.EventSetFree @@ -500,6 +506,7 @@ type Interface interface { DeviceGetHandleByUUID(string) (Device, Return) DeviceGetHandleByUUIDV(*UUID) (Device, Return) DeviceGetHostVgpuMode(Device) (HostVgpuMode, Return) + DeviceGetHostname_v1(Device) (string, Return) DeviceGetIndex(Device) (int, Return) DeviceGetInforomConfigurationChecksum(Device) (uint32, Return) DeviceGetInforomImageVersion(Device) (string, Return) @@ -595,6 +602,7 @@ type Interface interface { DeviceGetTotalEccErrors(Device, MemoryErrorType, EccCounterType) (uint64, Return) DeviceGetTotalEnergyConsumption(Device) (uint64, Return) DeviceGetUUID(Device) (string, Return) + DeviceGetUnrepairableMemoryFlag_v1(Device) (UnrepairableMemoryStatus_v1, Return) DeviceGetUtilizationRates(Device) (Utilization, Return) DeviceGetVbiosVersion(Device) (string, Return) DeviceGetVgpuCapabilities(Device, DeviceVgpuCapability) (bool, Return) @@ -618,6 +626,7 @@ type Interface interface { DevicePowerSmoothingSetState(Device, *PowerSmoothingState) Return DevicePowerSmoothingUpdatePresetProfileParam(Device, *PowerSmoothingProfile) Return DeviceQueryDrainState(*PciInfo) (EnableState, Return) + DeviceReadPRMCounters_v1(Device, []PRMCounterId, int) ([]PRMCounter_v1, Return) DeviceReadWritePRM_v1(Device, *PRMTLV_v1) Return DeviceRegisterEvents(Device, uint64, EventSet) Return DeviceRemoveGpu(*PciInfo) Return @@ -645,6 +654,7 @@ type Interface interface { DeviceSetGpcClkVfOffset(Device, int) Return DeviceSetGpuLockedClocks(Device, uint32, uint32) Return DeviceSetGpuOperationMode(Device, GpuOperationMode) Return + DeviceSetHostname_v1(Device, string) Return DeviceSetMemClkVfOffset(Device, int) Return DeviceSetMemoryLockedClocks(Device, uint32, uint32) Return DeviceSetMigMode(Device, int) (Return, Return) @@ -654,6 +664,7 @@ type Interface interface { DeviceSetPersistenceMode(Device, EnableState) Return DeviceSetPowerManagementLimit(Device, uint32) Return DeviceSetPowerManagementLimit_v2(Device, *PowerValue_v2) Return + DeviceSetRusdSettings_v1(Device, RusdSettings_v1) Return DeviceSetTemperatureThreshold(Device, TemperatureThresholds, int) Return DeviceSetVgpuCapabilities(Device, DeviceVgpuCapability, EnableState) Return DeviceSetVgpuHeterogeneousMode(Device, VgpuHeterogeneousMode) Return @@ -664,6 +675,7 @@ type Interface interface { DeviceWorkloadPowerProfileGetCurrentProfiles(Device) (WorkloadPowerProfileCurrentProfiles, Return) DeviceWorkloadPowerProfileGetProfilesInfo(Device) (WorkloadPowerProfileProfilesInfo, Return) DeviceWorkloadPowerProfileSetRequestedProfiles(Device, *WorkloadPowerProfileRequestedProfiles) Return + DeviceWorkloadPowerProfileUpdateProfiles_v1(Device, PowerProfileOperation, []PowerProfileType) Return ErrorString(Return) string EventSetCreate() (EventSet, Return) EventSetFree(EventSet) Return @@ -871,6 +883,7 @@ type Device interface { GetGspFirmwareMode() (bool, bool, Return) GetGspFirmwareVersion() (string, Return) GetHostVgpuMode() (HostVgpuMode, Return) + GetHostname_v1() (string, Return) GetIndex() (int, Return) GetInforomConfigurationChecksum() (uint32, Return) GetInforomImageVersion() (string, Return) @@ -966,6 +979,7 @@ type Device interface { GetTotalEccErrors(MemoryErrorType, EccCounterType) (uint64, Return) GetTotalEnergyConsumption() (uint64, Return) GetUUID() (string, Return) + GetUnrepairableMemoryFlag_v1() (UnrepairableMemoryStatus_v1, Return) GetUtilizationRates() (Utilization, Return) GetVbiosVersion() (string, Return) GetVgpuCapabilities(DeviceVgpuCapability) (bool, Return) @@ -993,6 +1007,7 @@ type Device interface { PowerSmoothingActivatePresetProfile(*PowerSmoothingProfile) Return PowerSmoothingSetState(*PowerSmoothingState) Return PowerSmoothingUpdatePresetProfileParam(*PowerSmoothingProfile) Return + ReadPRMCounters_v1([]PRMCounterId, int) ([]PRMCounter_v1, Return) ReadWritePRM_v1(*PRMTLV_v1) Return RegisterEvents(uint64, EventSet) Return ResetApplicationsClocks() Return @@ -1018,6 +1033,7 @@ type Device interface { SetGpcClkVfOffset(int) Return SetGpuLockedClocks(uint32, uint32) Return SetGpuOperationMode(GpuOperationMode) Return + SetHostname_v1(string) Return SetMemClkVfOffset(int) Return SetMemoryLockedClocks(uint32, uint32) Return SetMigMode(int) (Return, Return) @@ -1027,6 +1043,7 @@ type Device interface { SetPersistenceMode(EnableState) Return SetPowerManagementLimit(uint32) Return SetPowerManagementLimit_v2(*PowerValue_v2) Return + SetRusdSettings_v1(RusdSettings_v1) Return SetTemperatureThreshold(TemperatureThresholds, int) Return SetVgpuCapabilities(DeviceVgpuCapability, EnableState) Return SetVgpuHeterogeneousMode(VgpuHeterogeneousMode) Return @@ -1038,6 +1055,7 @@ type Device interface { WorkloadPowerProfileGetCurrentProfiles() (WorkloadPowerProfileCurrentProfiles, Return) WorkloadPowerProfileGetProfilesInfo() (WorkloadPowerProfileProfilesInfo, Return) WorkloadPowerProfileSetRequestedProfiles(*WorkloadPowerProfileRequestedProfiles) Return + WorkloadPowerProfileUpdateProfiles_v1(PowerProfileOperation, []PowerProfileType) Return } // GpuInstance represents the interface for the nvmlGpuInstance type. diff --git a/vendor/modules.txt b/vendor/modules.txt index 174cc38..f6d4007 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,7 +1,7 @@ # github.com/NVIDIA/go-nvlib v0.10.0 ## explicit; go 1.20 github.com/NVIDIA/go-nvlib/pkg/nvlib/device -# github.com/NVIDIA/go-nvml v0.13.0-1 +# github.com/NVIDIA/go-nvml v0.13.1-0 ## explicit; go 1.20 github.com/NVIDIA/go-nvml/pkg/dl github.com/NVIDIA/go-nvml/pkg/nvml