我正在使用https://devtalk.nvidia.com/default/topic/504951/how-to-call-nvml-apis-/
所示的nvml
库实施示例程序
该计划如下:
#include <stdio.h>
#include <nvidia/gdk/nvml.h>
const char * convertToComputeModeString(nvmlComputeMode_t mode)
{
switch (mode)
{
case NVML_COMPUTEMODE_DEFAULT:
return "Default";
case NVML_COMPUTEMODE_EXCLUSIVE_THREAD:
return "Exclusive_Thread";
case NVML_COMPUTEMODE_PROHIBITED:
return "Prohibited";
case NVML_COMPUTEMODE_EXCLUSIVE_PROCESS:
return "Exclusive Process";
default:
return "Unknown";
}
}
int main()
{
nvmlReturn_t result;
unsigned int device_count, i;
// First initialize NVML library
result = nvmlInit();
if (NVML_SUCCESS != result)
{
printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
printf("Press ENTER to continue...\n");
getchar();
return 1;
}
result = nvmlDeviceGetCount(&device_count);
if (NVML_SUCCESS != result)
{
printf("Failed to query device count: %s\n", nvmlErrorString(result));
goto Error;
}
printf("Found %d device%s\n\n", device_count, device_count != 1 ? "s" : "");
printf("Listing devices:\n");
for (i = 0; i < device_count; i++)
{
nvmlDevice_t device;
char name[64];
nvmlPciInfo_t pci;
nvmlComputeMode_t compute_mode;
// Query for device handle to perform operations on a device
// You can also query device handle by other features like:
// nvmlDeviceGetHandleBySerial
// nvmlDeviceGetHandleByPciBusId
result = nvmlDeviceGetHandleByIndex(i, &device);
if (NVML_SUCCESS != result)
{
printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
result = nvmlDeviceGetName(device, name, sizeof(name)/sizeof(name[0]));
if (NVML_SUCCESS != result)
{
printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
// pci.busId is very useful to know which device physically you're talking to
// Using PCI identifier you can also match nvmlDevice handle to CUDA device.
result = nvmlDeviceGetPciInfo(device, &pci);
if (NVML_SUCCESS != result)
{
printf("Failed to get pci info for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
printf("%d. %s [%s]\n", i, name, pci.busId);
// This is a simple example on how you can modify GPU's state
result = nvmlDeviceGetComputeMode(device, &compute_mode);
if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t This is not CUDA capable device\n");
else if (NVML_SUCCESS != result)
{
printf("Failed to get compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
// try to change compute mode
printf("\t Changing device's compute mode from '%s' to '%s'\n",
convertToComputeModeString(compute_mode),
convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED));
result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED);
if (NVML_ERROR_NO_PERMISSION == result)
printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result));
else if (NVML_ERROR_NOT_SUPPORTED == result)
printf("\t\t Compute mode prohibited not supported. You might be running on\n"
"\t\t windows in WDDM driver model or on non-CUDA capable GPU.\n");
else if (NVML_SUCCESS != result)
{
printf("\t\t Failed to set compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
else
{
printf("\t Restoring device's compute mode back to '%s'\n",
convertToComputeModeString(compute_mode));
result = nvmlDeviceSetComputeMode(device, compute_mode);
if (NVML_SUCCESS != result)
{
printf("\t\t Failed to restore compute mode for device %i: %s\n", i, nvmlErrorString(result));
goto Error;
}
}
}
}
result = nvmlShutdown();
if (NVML_SUCCESS != result)
printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));
printf("All done.\n");
printf("Press ENTER to continue...\n");
getchar();
return 0;
Error:
result = nvmlShutdown();
if (NVML_SUCCESS != result)
printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));
printf("Press ENTER to continue...\n");
getchar();
return 1;
}
makefile如下:
ARCH := $(shell getconf LONG_BIT)
ifeq (${ARCH},32)
NVML_LIB := ../lib/
else ifeq (${ARCH},64)
NVML_LIB := /usr/lib/nvidia-340/
else
$(error Unknown architecture!)
endif
CFLAGS := -I ../inc
LDFLAGS := -lnvidia-ml -L $(NVML_LIB)
example: example.o
$(CC) $(LDFLAGS) $< -o $@
clean:
-@rm -f example.o
-@rm -f example
我得到的错误是:
cc -lnvidia-ml -L /usr/src/gdk/nvml/lib/ example.o -o example
example.o: In function `main':
example.c:(.text+0x5f): undefined reference to `nvmlInit_v2'
example.c:(.text+0x7b): undefined reference to `nvmlErrorString'
example.c:(.text+0xb5): undefined reference to `nvmlDeviceGetCount_v2'
example.c:(.text+0xd1): undefined reference to `nvmlErrorString'
example.c:(.text+0x149): undefined reference to `nvmlDeviceGetHandleByIndex_v2'
example.c:(.text+0x165): undefined reference to `nvmlErrorString'
example.c:(.text+0x19f): undefined reference to `nvmlDeviceGetName'
example.c:(.text+0x1bb): undefined reference to `nvmlErrorString'
example.c:(.text+0x1f3): undefined reference to `nvmlDeviceGetPciInfo_v2'
example.c:(.text+0x20f): undefined reference to `nvmlErrorString'
example.c:(.text+0x269): undefined reference to `nvmlDeviceGetComputeMode'
example.c:(.text+0x29d): undefined reference to `nvmlErrorString'
example.c:(.text+0x2ff): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x31b): undefined reference to `nvmlErrorString'
example.c:(.text+0x360): undefined reference to `nvmlErrorString'
example.c:(.text+0x3b5): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x3d1): undefined reference to `nvmlErrorString'
example.c:(.text+0x40c): undefined reference to `nvmlShutdown'
example.c:(.text+0x428): undefined reference to `nvmlErrorString'
example.c:(.text+0x45f): undefined reference to `nvmlShutdown'
example.c:(.text+0x47b): undefined reference to `nvmlErrorString'
collect2: error: ld returned 1 exit status
make: *** [example] Error 1
pranjal@PCL:~/nvidia$ make
cc -lnvidia-ml -L /usr/lib/nvidia-340/ example.o -o example
example.o: In function `main':
example.c:(.text+0x5f): undefined reference to `nvmlInit_v2'
example.c:(.text+0x7b): undefined reference to `nvmlErrorString'
example.c:(.text+0xb5): undefined reference to `nvmlDeviceGetCount_v2'
example.c:(.text+0xd1): undefined reference to `nvmlErrorString'
example.c:(.text+0x149): undefined reference to `nvmlDeviceGetHandleByIndex_v2'
example.c:(.text+0x165): undefined reference to `nvmlErrorString'
example.c:(.text+0x19f): undefined reference to `nvmlDeviceGetName'
example.c:(.text+0x1bb): undefined reference to `nvmlErrorString'
example.c:(.text+0x1f3): undefined reference to `nvmlDeviceGetPciInfo_v2'
example.c:(.text+0x20f): undefined reference to `nvmlErrorString'
example.c:(.text+0x269): undefined reference to `nvmlDeviceGetComputeMode'
example.c:(.text+0x29d): undefined reference to `nvmlErrorString'
example.c:(.text+0x2ff): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x31b): undefined reference to `nvmlErrorString'
example.c:(.text+0x360): undefined reference to `nvmlErrorString'
example.c:(.text+0x3b5): undefined reference to `nvmlDeviceSetComputeMode'
example.c:(.text+0x3d1): undefined reference to `nvmlErrorString'
example.c:(.text+0x40c): undefined reference to `nvmlShutdown'
example.c:(.text+0x428): undefined reference to `nvmlErrorString'
example.c:(.text+0x45f): undefined reference to `nvmlShutdown'
example.c:(.text+0x47b): undefined reference to `nvmlErrorString'
collect2: error: ld returned 1 exit status
make: *** [example] Error 1
任何帮助将不胜感激。谢谢。
答案 0 :(得分:2)
这是我在Linux CUDA 7.5设置上所做的:
将GPU驱动程序更新为352.79。就我而言,这是通过runfile安装程序here完成的。如果您之前通过包管理器方法(例如.deb)安装了GPU驱动程序,那么您不想使用runfile安装程序方法。
获取the GDK的最新版本(请参阅下面的注释),此时恰好以352.79为目标,并包含nvml:
wget --no-check-certificate http://developer.download.nvidia.com/compute/cuda/7.5/Prod/gdk/gdk_linux_amd64_352_79_release.run
安装GDK:
sh gdk_linux_amd64_352_79_release.run
验证相应的库已更新:
ls /usr/lib64/libnv*
(您应该看到libnvidia-ml.so.352.79
等。)
编译示例文件:
g++ -I./gdk352_79/usr/include -L/usr/lib64 -lnvidia-ml example.c -o example
当我运行example
可执行文件时,我得到:
$ ./example
Found 2 devices
Listing devices:
0. Quadro 5000 [0000:02:00.0]
Changing device's compute mode from 'Default' to 'Prohibited'
Need root privileges to do that: Insufficient Permissions
1. GeForce GT 640 [0000:03:00.0]
Changing device's compute mode from 'Default' to 'Prohibited'
Need root privileges to do that: Insufficient Permissions
All done.
Press ENTER to continue...
$
希望这会让你前进。我假设您在需要时不需要帮助进行任何Makefile
更改。如果Makefile
不起作用,请继续修改它,直到获得我在步骤5中列出的确切编译命令。
注意:从CUDA 8.0开始,GDK不是一个单独的实体,而是与CUDA 8.0工具包一起安装。没有必要单独安装GDK。