运行以下示例时出现上述错误。我在几台机器上尝试了这个但是我得到了同样的错误。我从activity_trace_async
复制了这个。这里有什么想法错了吗?
#include <cuda.h>
#include <cuda_runtime.h>
#include <cupti.h>
#include <stdio.h>
#include <string.h>
#define BUF_SIZE (32 * 1024)
#define ALIGN_SIZE (8)
#define ALIGN_BUFFER(buffer, align) \
(((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) &
((align)-1))) : (buffer))
#define CUPTI_CALL(call) \
do { \
CUptiResult _status = call; \
if (_status != CUPTI_SUCCESS) { \
const char *errstr; \
cuptiGetResultString(_status, &errstr); \
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
__FILE__, __LINE__, #call, errstr); \
exit(-1); \
} \
} while (0)
const char *
getActivityPower(CUpti_ActivityEnvironmentKind kind)
{
switch (kind) {
case CUPTI_ACTIVITY_ENVIRONMENT_SPEED:
return "SPEED";
case CUPTI_ACTIVITY_ENVIRONMENT_TEMPERATURE:
return "TEMPERATURE";
case CUPTI_ACTIVITY_ENVIRONMENT_POWER:
return "POWER";
case CUPTI_ACTIVITY_ENVIRONMENT_COOLING:
return "COOLING";
default:
break;
}
return "<unknown>";
}
static void
printActivity(CUpti_Activity *record)
{
switch (record->kind)
{
case CUPTI_ACTIVITY_KIND_ENVIRONMENT:
{
CUpti_ActivityEnvironment *env= ( CUpti_ActivityEnvironment *) record;
printf("POWER %s \n",
getActivityPower(env->environmentKind)) ;
break;
}
default:
printf(" <unknown>\n");
break;
}
}
void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
{
uint8_t *bfr = (uint8_t *) malloc(BUF_SIZE + ALIGN_SIZE);
if (bfr == NULL) {
printf("Error: out of memory\n");
exit(-1);
}
*size = BUF_SIZE;
*buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE);
*maxNumRecords = 0;
}
void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
{
CUptiResult status;
CUpti_Activity *record = NULL;
if (validSize > 0) {
if (ctx == NULL) {
printf("==== Starting dump for global ====\n");
}
else {
printf("==== Starting dump for context %p, stream %u ====\n", ctx, streamId);
}
do {
status = cuptiActivityGetNextRecord(buffer, validSize, &record);
if (status == CUPTI_SUCCESS) {
printActivity(record);
}
else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED)
break;
else {
CUPTI_CALL(status);
}
} while (1);
// report any records dropped from the queue
size_t dropped;
CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
if (dropped != 0) {
printf("Dropped %u activity records\n", (unsigned int) dropped);
}
if (ctx == NULL) {
printf("==== Finished dump for global ====\n");
}
else {
printf("==== Finished dump for context %p, stream %u ====\n", ctx, streamId);
}
}
free(buffer);
}
int
main(int argc, char *argv[])
{
size_t attrValue = 0, attrValueSize = sizeof(size_t);
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_ENVIRONMENT));
CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue);
attrValue *= 2;
CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue);
attrValue *= 2;
CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
return 0;
}
答案 0 :(得分:1)
您需要创建CUDA设备上下文。只需添加
int count = 0;
cudaGetDeviceCount(&count);
在main()的开头,重新编译,错误应该消失。