给定二进制图像,我想使用GPU(计算统一设备架构)返回其中白色像素的索引列表。如何确定点向量的索引? 这是CUDA内核。
//copy only active pixel locations
__global__ void get_white_pixels_kernel(unsigned char* bin_image,
float * points,
int width,
int height,
int grayWidthStep)
{
int row_index = threadIdx.y+ blockIdx.y*blockDim.y;
int col_index = threadIdx.x+blockIdx.x*blockDim.x;
if ((col_index < width) && (row_index < height))
{
//Location of gray pixel in output
const int gray_tid = row_index * grayWidthStep + col_index;
if(input[gray_tid]==255)
points[--here is the index]= Point2f(row_index,col_index);
}
}
答案 0 :(得分:0)
以下是实现所需功能的简单方法:
以下是上述流程的示例代码。
#include <cstdio>
#include <vector>
#include <cuda_runtime.h>
#include <thrust/count.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <opencv2/opencv.hpp>
static void _check_err(cudaError_t err, const char* file, int line)
{
if(err)
{
const char* err_str = cudaGetErrorString(err);
printf("CUDA Error: %s\nFile: %s\nLine: %d\n", err_str, file, line);
exit(EXIT_FAILURE);
}
}
#define CHECK_ERR(err) _check_err((err), __FILE__, __LINE__)
__global__ void kernel_find_indices(const unsigned char* input, int width, int height, int step, int2* indices)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if(x < width && y < height)
{
const int tidPixel = y * step + x;
const int tidIndex = y * width + x;
unsigned char value = input[tidPixel];
int2 index_to_write;
if(value)
{
//Write actual index to pixels with non-zero value
index_to_write.x = x;
index_to_write.y = y;
}
else
{
//Write dummy index to pixels with zero value
index_to_write.x = -1;
index_to_write.y = -1;
}
indices[tidIndex] = index_to_write;
}
}
//Operator to check whether an index is of a non-zero pixel
struct isNonZeroIndex
{
__host__ __device__ bool operator()(const int2 &idx)
{
return (idx.x != -1) && (idx.y != -1);
}
};
std::vector<cv::Point> getIndicesOfNonZeroPixels(cv::Mat input)
{
std::vector<int2> output_int2;
std::vector<cv::Point> output;
int pixelCount = input.cols * input.rows;
size_t imageBytes= input.step * input.rows;
unsigned char* image_d;
thrust::device_vector<int2> index_buffer_d(pixelCount);
//Allocate device memory for input image
CHECK_ERR(cudaMalloc(&image_d, imageBytes));
//Copy input image to device
CHECK_ERR(cudaMemcpy(image_d, input.ptr(), imageBytes, cudaMemcpyHostToDevice));
dim3 block(16,16);
dim3 grid;
grid.x = (input.cols + block.x - 1) / block.x;
grid.y = (input.rows + block.y - 1) / block.y;
//Generate an index mask with dummy values for indices with zero pixel value
kernel_find_indices<<<grid, block>>>(image_d, input.cols, input.rows, input.step, thrust::raw_pointer_cast(index_buffer_d.data()));
CHECK_ERR(cudaDeviceSynchronize());
int nonZeroCount = thrust::count_if(index_buffer_d.begin(), index_buffer_d.end(), isNonZeroIndex());
//Keep only those indices whose pixel value is non-zero (stream compaction)
thrust::device_vector<int2> compacted(nonZeroCount);
thrust::copy_if(index_buffer_d.begin(), index_buffer_d.end(), compacted.begin(), isNonZeroIndex());
//Copy non-zero pixel indices to host
output_int2.resize(nonZeroCount);
thrust::copy(compacted.begin(), compacted.end(), output_int2.begin());
CHECK_ERR(cudaFree(image_d));
//Convert vector<int2> to vector<cv::Point>
output.resize(nonZeroCount);
for(size_t i=0; i<nonZeroCount; i++)
output[i] = cv::Point(output_int2[i].x, output_int2[i].y);
return output;
}
void run_test()
{
//Generate a sample test image
cv::Mat test = cv::Mat::zeros(100,100, CV_8UC1);
cv::rectangle(test, cv::Rect(5,5,20,20), cv::Scalar::all(255), CV_FILLED);
//Get pixel indices of non-zero pixels
std::vector<cv::Point> indices = getIndicesOfNonZeroPixels(test);
//Display those indices
for(size_t i=0; i<indices.size(); i++)
{
printf("%d, %d\n", indices[i].x, indices[i].y);
}
//Show image
cv::imshow("Sample", test);
cv::waitKey();
}
int main(int argc, char** argv)
{
run_test();
return 0;
}
nvcc -o nz nz.cu -arch = sm_61 -L / usr / local / lib -lopencv_core -lopencv_highgui -lopencv_imgproc
请注意,此代码仅适用于8UC1
类型(8位,单通道)的图像。您可以根据需要轻松地将其扩展到其他数据类型。