CUDA Thrust - 运行索引的运行长度编码

时间:2014-11-18 01:24:41

标签: cuda thrust

我正在尝试构建一个"行程编码器"使用CUDA Thrust在文件中生成 报告 的运行。我会用这个"报告"稍后执行行程编码步骤。

e.g。 输入顺序:

inputSequence = [a, a, b, c, a, a, a];

输出序列:

runChar = [a, a];
runCount = [2, 3];
runPosition = [0, 4];

输出表示从位置0开始的2 a的运行以及从位置4开始的3 a的运行。

下面描述的推力行程长度编码器示例输出两个数组 - 一个用于输出字符,一个用于其长度。

我想修改它,因此排除少于2的运行,并且它还输出每次运行发生的位置。

 // input data on the host
    const char data[] = "aaabbbbbcddeeeeeeeeeff";

    const size_t N = (sizeof(data) / sizeof(char)) - 1;

    // copy input data to the device
    thrust::device_vector<char> input(data, data + N);

    // allocate storage for output data and run lengths
    thrust::device_vector<char> output(N);
    thrust::device_vector<int>  lengths(N);

    // print the initial data
    std::cout << "input data:" << std::endl;
    thrust::copy(input.begin(), input.end(), std::ostream_iterator<char>(std::cout, ""));
    std::cout << std::endl << std::endl;

    // compute run lengths
    size_t num_runs = thrust::reduce_by_key
                                    (input.begin(), input.end(),          // input key sequence
                                     thrust::constant_iterator<int>(1),   // input value sequence
                                     output.begin(),                      // output key sequence
                                     lengths.begin()                      // output value sequence
                                     ).first - output.begin();            // compute the output size

    // print the output
    std::cout << "run-length encoded output:" << std::endl;
    for(size_t i = 0; i < num_runs; i++)
        std::cout << "(" << output[i] << "," << lengths[i] << ")";
    std::cout << std::endl;

    return 0;

1 个答案:

答案 0 :(得分:3)

一种可能的方法,建立在您已经展示的内容之上:

  1. 获取输出长度,并对它们执行exclusive_scan。这将创建每个运行的起始索引的相应向量。

  2. 使用流压缩(remove_if)从相应长度为1的所有数组(输出,长度和索引)中删除元素。我们分两步完成,第一个remove_if操作来清理输出和索引,使用长度作为模板,第二个直接在长度上操作。通过同时操作所有3可以显着改善这一点,这将使输出长度计算更复杂一些。您如何处理这一点将取决于您打算保留哪些数据集。

  3. 这是一个完整的示例,扩展了您的代码:

    $ cat t601.cu
    #include <iostream>
    #include <thrust/device_vector.h>
    #include <thrust/copy.h>
    #include <thrust/reduce.h>
    #include <thrust/scan.h>
    #include <thrust/iterator/constant_iterator.h>
    #include <thrust/iterator/zip_iterator.h>
    
    struct is_not_one{
    
    template <typename T>
       __host__ __device__
       bool operator()(T data){
         return data != 1;
       }
    };
    
    int main(){
    
    // input data on the host
        const char data[] = "aaabbbbbcddeeeeeeeeeff";
    
        const size_t N = (sizeof(data) / sizeof(char)) - 1;
    
        // copy input data to the device
        thrust::device_vector<char> input(data, data + N);
    
        // allocate storage for output data and run lengths
        thrust::device_vector<char> output(N);
        thrust::device_vector<int>  lengths(N);
    
        // print the initial data
        std::cout << "input data:" << std::endl;
        thrust::copy(input.begin(), input.end(), std::ostream_iterator<char>(std::cout, ""));
        std::cout << std::endl << std::endl;
    
        // compute run lengths
        size_t num_runs = thrust::reduce_by_key
                                        (input.begin(), input.end(),          // input key sequence
                                         thrust::constant_iterator<int>(1),   // input value sequence
                                         output.begin(),                      // output key sequence
                                         lengths.begin()                      // output value sequence
                                         ).first - output.begin();            // compute the output size
    
        // print the output
        std::cout << "run-length encoded output:" << std::endl;
        for(size_t i = 0; i < num_runs; i++)
            std::cout << "(" << output[i] << "," << lengths[i] << ")";
        std::cout << std::endl;
    
        thrust::device_vector<int> indexes(num_runs);
        thrust::exclusive_scan(lengths.begin(), lengths.begin()+num_runs, indexes.begin());
        thrust::device_vector<char> foutput(num_runs);
        thrust::device_vector<int>  findexes(num_runs);
        thrust::device_vector<int>  flengths(num_runs);
        thrust::copy_if(thrust::make_zip_iterator(thrust::make_tuple(output.begin(), indexes.begin())), thrust::make_zip_iterator(thrust::make_tuple(output.begin()+num_runs, indexes.begin()+num_runs)), lengths.begin(), thrust::make_zip_iterator(thrust::make_tuple(foutput.begin(), findexes.begin())), is_not_one());
        size_t fnum_runs = thrust::copy_if(lengths.begin(), lengths.begin()+num_runs, flengths.begin(), is_not_one()) - flengths.begin();
        std::cout << "output: " << std::endl;
        thrust::copy_n(foutput.begin(), fnum_runs, std::ostream_iterator<char>(std::cout, ","));
        std::cout << std::endl << "lengths: " << std::endl;
        thrust::copy_n(flengths.begin(), fnum_runs, std::ostream_iterator<int>(std::cout, ","));
        std::cout << std::endl << "indexes: " << std::endl;
        thrust::copy_n(findexes.begin(), fnum_runs, std::ostream_iterator<int>(std::cout, ","));
        std::cout << std::endl;
    
        return 0;
    
    }
    $ nvcc -arch=sm_20 -o t601 t601.cu
    $ ./t601
    input data:
    aaabbbbbcddeeeeeeeeeff
    
    run-length encoded output:
    (a,3)(b,5)(c,1)(d,2)(e,9)(f,2)
    output:
    a,b,d,e,f,
    lengths:
    3,5,2,9,2,
    indexes:
    0,3,9,11,20,
    $
    

    我确信此代码可以改进,但我的目的是向您展示一种可能的一般方法。

    在我看来,为了将来参考,从示例代码中删除包含标头对您没有多大帮助。我认为最好提供完整的,可编译的代码。在这种情况下没什么大不了的。

    另请注意,run length encodingdecoding有推文示例代码。