Question

我需要沿着矩阵的列以及使用推力的行索引取最小值。我使用以下代码（从Orange owl解决方案复制），但是在编译时出现错误。我已将其作为问题发布在相应的git页面上。错误消息很大，我不知道如何调试它。有人可以帮我吗？我正在使用cuda-8.0，推力版本1.8。

代码：

#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<float> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 / Nrows) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows) * Ncols + _1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), _1 % Nrows))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            thrust::minimum<thrust::tuple<float, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}

错误：

/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp(58): error: ambiguous "?" operation: second operand of type "const thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>" can be converted to third operand type "thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>", and vice versa
          detected during:
            instantiation of "thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::result_type thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::operator()(const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::first_argument_type &, const thrust::system::cuda::detail::bulk_::detail::reduce_by_key_detail::scan_head_flags_functor<FlagType, ValueType, BinaryFunction>::second_argument_type &) [with FlagType=int, ValueType=thrust::tuple<double, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=thrust::minimum<thrust::tuple<float, int, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>]"

Answer 1

我猜您正在使用this代码。

该代码的一个奇怪特征是使用double类型定义矩阵，但是捕获的最小值存储在float向量中。

如果您想按原样使用该代码，根据我的测试，推力（在CUDA 10中，显然也在CUDA 8中）不喜欢此行：

        thrust::minimum<thrust::tuple<float, int> >()

该运算符用于比较两个项目以确定哪个较小，并且它被模板化以接受不同种类的项目。但是，它决定找到其中至少两个元组是“模棱两可”的请求。造成这种情况的部分原因是，运算符返回了float, int元组，但是却被分别赋予double,int元组或float,int元组。

我们可以通过传递自己的函子来完成这项工作，以解决/解决此问题，这在处理传递给它的元组方面是明确的：

$ cat t373.cu
#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;
struct my_min
{
template <typename T1, typename T2>
  __host__ __device__
  T2 operator()(T1 t1, T2 t2){
    if (thrust::get<0>(t1) < thrust::get<0>(t2)) return t1;
    return t2;
    }
};

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<float> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            my_min()
//            thrust::minimum<thrust::tuple<float, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}
$ nvcc -o t373 t373.cu
$ ./t373


Matrix
 [ 0 1 12 18 20 3 10 8 ]
 [ 5 15 1 11 12 17 12 10 ]
 [ 18 20 15 20 6 8 18 13 ]
 [ 18 20 3 18 19 6 19 8 ]
 [ 6 10 8 16 14 11 12 1 ]
 [ 12 9 12 17 10 16 1 4 ]


Min position = 0; Min value = 0
Min position = 0; Min value = 1
Min position = 1; Min value = 1
Min position = 1; Min value = 11
Min position = 2; Min value = 6
Min position = 0; Min value = 3
$

我认为更好的解决方法是只选择float或double中的一个。例如，如果我们将所有float类型都修改为double，则推力是令人满意的，而无需进行其他任何更改：

$ cat t373a.cu
#include <iterator>
#include <algorithm>

#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/random.h>

using namespace thrust::placeholders;

int main()
{
    const int Nrows = 6;
    const int Ncols = 8;

    /**************************/
    /* SETTING UP THE PROBLEM */
    /**************************/

    // --- Random uniform integer distribution between 0 and 100
    thrust::default_random_engine rng;
    thrust::uniform_int_distribution<int> dist(0, 20);

    // --- Matrix allocation and initialization
    thrust::device_vector<double> d_matrix(Nrows * Ncols);
    for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (double)dist(rng);

    printf("\n\nMatrix\n");
    for(int i = 0; i < Nrows; i++) {
        std::cout << " [ ";
        for(int j = 0; j < Ncols; j++)
            std::cout << d_matrix[i * Ncols + j] << " ";
        std::cout << "]\n";
    }

    /**********************************************/
    /* FIND ROW MINIMA ALONG WITH THEIR LOCATIONS */
    /**********************************************/
    thrust::device_vector<double> d_minima(Ncols);
    thrust::device_vector<int> d_indices(Ncols);

    thrust::reduce_by_key(
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 / Nrows)) + Nrows * Ncols,
            thrust::make_zip_iterator(
                    thrust::make_tuple(thrust::make_permutation_iterator(
                                    d_matrix.begin(),
                                    thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), ((_1 % Nrows) * Ncols + _1 / Nrows))),
            thrust::make_transform_iterator(thrust::make_counting_iterator((int) 0), (_1 % Nrows)))),
            thrust::make_discard_iterator(),
            thrust::make_zip_iterator(thrust::make_tuple(d_minima.begin(), d_indices.begin())),
            thrust::equal_to<int>(),
            thrust::minimum<thrust::tuple<double, int> >()
    );

    printf("\n\n");
    for (int i=0; i<Nrows; i++) std::cout << "Min position = " << d_indices[i] << "; Min value = " << d_minima[i] << "\n";

    return 0;
}
$ nvcc -o t373a t373a.cu
$ ./t373a


Matrix
 [ 0 1 12 18 20 3 10 8 ]
 [ 5 15 1 11 12 17 12 10 ]
 [ 18 20 15 20 6 8 18 13 ]
 [ 18 20 3 18 19 6 19 8 ]
 [ 6 10 8 16 14 11 12 1 ]
 [ 12 9 12 17 10 16 1 4 ]


Min position = 0; Min value = 0
Min position = 0; Min value = 1
Min position = 1; Min value = 1
Min position = 1; Min value = 11
Min position = 2; Min value = 6
Min position = 0; Min value = 3
$

我认为后一种使用一致类型的解决方案是更明智的解决方案。

无法使用reduce_by_key编译推力代码

1 个答案: