我基本上将两个2D点阵列分配到GPU上。第一个包含DataSize点和第二个CentroidSize点。这些在课程的其他地方指定。构造函数。基本上,内存分配没有错误,但复制到data_xy时出现无效访问错误,而centroid_xy分配正常。
以下是相关代码。
float* device_ptr_centroids_xy, *device_ptr_data_xy,** host_ptr_centroids_xy, **host_ptr_data_xy;
size_t host_data_pitch, host_centroids_pitch;
// Cannot use member functions on GPU. Rearrange Centroid/Feature data into 1D float arrays
host_ptr_data_xy = new float*[DataSize];
host_ptr_centroids_xy = new float*[CentroidSize];
for(int i=0; i< DataSize; i++)
{
host_ptr_data_xy[i] = new float[2];
host_ptr_data_xy[i][0] = PointCloud[i].contents().first;
host_ptr_data_xy[i][1] = PointCloud[i].contents().second;
}
for(int i = 0; i < CentroidSize; i++)
{
host_ptr_centroids_xy[i] = new float[2];
host_ptr_centroids_xy[i][0] = Centroids[i].contents().first;
host_ptr_centroids_xy[i][1] = Centroids[i].contents().second;
};
// Prepare GPU global memory for Feature Space. Returns Pitch (size after padding for bank alignment)
Success( cudaMallocPitch( &device_ptr_centroids_xy, &host_centroids_pitch, sizeof(float) * CentroidSize, 2 ) );
Success( cudaMallocPitch( &device_ptr_data_xy, &host_data_pitch, sizeof(float) * DataSize, 2 ) );
Success( cudaMemcpy2D(device_ptr_centroids_xy, host_centroids_pitch, host_ptr_centroids_xy, sizeof(float) * CentroidSize, sizeof(float) * CentroidSize, 2, cudaMemcpyHostToDevice) );
Success( cudaMemcpy2D(device_ptr_data_xy, host_data_pitch, host_ptr_data_xy, sizeof(float) * DataSize, sizeof(float) * DataSize, 2, cudaMemcpyHostToDevice) );