CUDA用元组键按键减少推力

时间:2012-06-11 21:44:44

标签: cuda thrust

我有两个向量,在创建tuple(带有zip_iterator)之后,我会使用sort_by_key对其进行排序,然后应用reduce_by_key

但是按键减少效果不好,因为它会创建一个不正确的向量计数器。愿有人帮帮我吗?这是我的相关代码段。

...
typedef thrust::device_vector<int>::iterator IntIterator;
typedef thrust::tuple<IntIterator, IntIterator> IteratorTuple;
typedef thrust::zip_iterator<IteratorTuple> ZipIterator;
typedef  thrust::tuple<int, int> tupla; 
...
thrust::device_vector <int> documenti(n);
thrust::device_vector <int> strip(n);
...
ZipIterator bufferBegin (thrust::make_tuple(documenti.begin(),strip.begin()));
ZipIterator bufferEnd (thrust::make_tuple(documenti.end(),strip.end()));

...
thrust::sort_by_key(bufferBegin,bufferEnd, counter.begin());

thrust::device_vector <tupla> example(n);

thrust::reduce_by_key(bufferBegin,bufferEnd, counter.begin(), example.begin(), counter.begin());

thrust::sort_by_key(counter.begin(), counter.begin()+n, example.begin(),thrust::greater <int>());

1 个答案:

答案 0 :(得分:3)

我正在提供这个问题的答案,只是为了将其从未答复的列表中删除。

我的问题似乎不太清楚。从您发布的代码段中,我的理解是您对使用元组键的减少密钥感兴趣。

下面你可以找到一个完整的例子。我希望它对未来的用户有所帮助。

#include <thrust/device_vector.h>
#include <thrust/reduce.h>

// --- Defining key tuple type
typedef thrust::tuple<int,int> Tuple;

typedef thrust::host_vector<Tuple>::iterator  dIter1;
typedef thrust::host_vector<float>::iterator  dIter2;

/************************************/
/* EQUALITY OPERATOR BETWEEN TUPLES */
/************************************/
struct BinaryPredicate
{
  __host__ __device__ bool operator () 
                      (const Tuple& lhs, const Tuple& rhs) 
  {
    return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs));
  }
};

/********/
/* MAIN */
/********/
int main()
{
    const int N = 7;

    thrust::host_vector<Tuple> keys_input(N);
    thrust::host_vector<float> values_input(N);

    int keys1_input[N]          = {1, 3, 3, 3, 2, 2, 1};            // input keys 1
    int keys2_input[N]          = {1, 5, 3, 8, 2, 2, 1};            // input keys 2
    float input_values[N]       = {9., 8., 7., 6., 5., 4., 3.};     // input values

    for (int i=0; i<N; i++) {
        keys_input[i] = thrust::make_tuple(keys1_input[i], keys2_input[i]);
        values_input[i] = input_values[i];
    }

    for (int i=0; i<N; i++) printf("%i %i\n", thrust::get<0>(keys_input[i]), thrust::get<1>(keys_input[i]));

    thrust::host_vector<Tuple> keys_output(N);
    thrust::host_vector<float> values_output(N);

    thrust::pair<dIter1, dIter2> new_end;

    new_end = thrust::reduce_by_key(keys_input.begin(), 
                                    keys_input.end(), 
                                    values_input.begin(), 
                                    keys_output.begin(), 
                                    values_output.begin(), 
                                    BinaryPredicate(),
                                    thrust::plus<float>());

    int Nkeys = new_end.first - keys_output.begin();

    printf("\n\n");
    for (int i = 0; i < Nkeys; i++) printf("%i; %f\n", i, values_output[i]);

    printf("\n\n");
    for (int i = 0; i < Nkeys; i++) printf("%i %i\n", thrust::get<0>(keys_output[i]), thrust::get<1>(keys_output[i]));

    return 0;
}

修改

以上工作的例子提到了host_vector。下面是一个完整的例子,考虑了键和值向量是常规cudaMalloc'ed数组的情况。

#include <thrust/device_vector.h>
#include <thrust/reduce.h>

#include "Utilities.cuh"

// --- Defining key tuple type
typedef thrust::tuple<int, int> Tuple;

typedef thrust::device_vector<Tuple>::iterator  dIter1;
typedef thrust::device_vector<float>::iterator  dIter2;

/************************************/
/* EQUALITY OPERATOR BETWEEN TUPLES */
/************************************/
struct BinaryPredicate
{
    __host__ __device__ bool operator ()
        (const Tuple& lhs, const Tuple& rhs)
    {
        return (thrust::get<0>(lhs) == thrust::get<0>(rhs)) && (thrust::get<1>(lhs) == thrust::get<1>(rhs));
    }
};

/********/
/* MAIN */
/********/
int main()
{
    const int N = 7;

    // --- Keys and input values on the host: allocation and definition
    int h_keys1_input[N] = { 1, 3, 3, 3, 2, 2, 1 };                                         // --- Input keys 1 - host side
    int h_keys2_input[N] = { 1, 5, 3, 8, 2, 2, 1 };                                         // --- Input keys 2 - host side
    float h_input_values[N] = { 9., 8., 7., 6., 5., 4., 3. };                               // --- Input values - host side

    // --- Keys and input values on the device: allocation
    int *d_keys1_input;     gpuErrchk(cudaMalloc(&d_keys1_input, N * sizeof(int)));         // --- Input keys 1 - device side
    int *d_keys2_input;     gpuErrchk(cudaMalloc(&d_keys2_input, N * sizeof(int)));         // --- Input keys 2 - device side
    float *d_input_values;  gpuErrchk(cudaMalloc(&d_input_values, N * sizeof(float)));      // --- Input values - device side

    // --- Keys and input values: host -> device
    gpuErrchk(cudaMemcpy(d_keys1_input, h_keys1_input,   N * sizeof(int),   cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_keys2_input, h_keys2_input,   N * sizeof(int),   cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_input_values, h_input_values, N * sizeof(float), cudaMemcpyHostToDevice));

    // --- From raw pointers to device_ptr
    thrust::device_ptr<int> dev_ptr_keys1 = thrust::device_pointer_cast(d_keys1_input);
    thrust::device_ptr<int> dev_ptr_keys2 = thrust::device_pointer_cast(d_keys2_input);
    thrust::device_ptr<float> dev_ptr_values = thrust::device_pointer_cast(d_input_values);

    // --- Declare outputs
    thrust::device_vector<Tuple> d_keys_output(N);
    thrust::device_vector<float> d_values_output(N);

    thrust::pair<dIter1, dIter2> new_end;

    auto begin = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1, dev_ptr_keys2));
    auto end   = thrust::make_zip_iterator(thrust::make_tuple(dev_ptr_keys1 + N, dev_ptr_keys2 + N));

    new_end = thrust::reduce_by_key(begin,
                                    end,
                                    dev_ptr_values,
                                    d_keys_output.begin(),
                                    d_values_output.begin(),
                                    BinaryPredicate(),
                                    thrust::plus<float>());

    int Nkeys = new_end.first - d_keys_output.begin();

    printf("\n\n");
    for (int i = 0; i < Nkeys; i++) {
        float output = d_values_output[i];
        printf("%i; %f\n", i, output);
    }

    thrust::host_vector<Tuple> h_keys_output(d_keys_output);
    printf("\n\n");
    for (int i = 0; i < Nkeys; i++) {
        int key1 = thrust::get<0>(h_keys_output[i]);
        int key2 = thrust::get<1>(h_keys_output[i]);
        printf("%i %i\n", key1, key2);
    }

    return 0;
}