我正在尝试使用推力的变换函数将设备数组转换为另一个设备数组作为否定。
#include "cuda_runtime.h"
#include <cuda_runtime_api.h>
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/generate.h>
#include <thrust/transform.h>
#include <thrust/execution_policy.h>
#include <algorithm>
#include <cstdlib>
#include <stdio.h>
#include <vector>
int main()
{
unsigned * dkeys = NULL;
cudaMalloc((void**) &dkeys, sizeof(unsigned) * size);
vector<unsigned> keys (size);
std::iota(keys.begin(), keys.end(), 0);
unsigned * key_raw = &keys[0];
cudaMemcpy(dkeys, key_raw, sizeof(unsigned) * size, cudaMemcpyHostToDevice);
int *b = NULL;
cudaMalloc((void**) &b, sizeof(int) * size);
thrust::negate<int> op;
thrust::transform(dkeys, dkeys + size, b, op);
return 0;
}
这些行会引发分段错误(核心转储)。这可能是什么问题?
甚至
thrust::negate<int> op;
thrust::transform(dkeys, dkeys + size, dkeys, op);
也会抛出相同的