我在互联网上寻找官方方法来获取当前NVIDIA GPU的每个块的最大线程数,但我一无所获。
我读到这个数字必须是2 ^ N所以我做了一些找到它的代码,但我不确定它是100%正确的,也许还有更好的方法。
在我的情况下,它给我输出1024(对于我的NVIDIA GTX 660M)。
这是我的代码:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
#include <Windows.h>
// Easy print macros:
#define pp std::cout <<
#define ss << " " <<
#define ee << std::endl;
#define ww while(1){Sleep(100);} return 0;
namespace nameMyCudaSystem {
int threadsPerBlock = 0;
__global__ void ThreadPerBlockCounter(){/*Do nothing*/}
int InitializeCuda()
{
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) return -1;
threadsPerBlock = 1;
do
{
if (threadsPerBlock > 1)
{
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)
{
return -2;
break;
}
}
threadsPerBlock *= 2;
ThreadPerBlockCounter << <1, threadsPerBlock >> >();
} while (cudaGetLastError() == cudaSuccess);
threadsPerBlock /= 2;
if (threadsPerBlock == 0) return -3;
return 0;
}
}
int main()
{
if (nameMyCudaSystem::InitializeCuda() != 0) return -1; // error (some error inside InitializeCuda)
pp nameMyCudaSystem::threadsPerBlock ee; // print the maximum number of thread per block
ww; // just a loop to keep the console window open...
return 0;
}