我有一个矩阵,我需要计算2
个最大数字及其在该矩阵每行中的位置。我最初的尝试是尝试对矩阵的每一行进行排序,然后查看最后两个值。虽然我可以对每一行进行排序,但我无法获得排列向量来获取原始索引。所以我的尝试(在SO上使用其他一些线程)如下:
int my_mod_start = 0;
int my_mod()
{
return (my_mod_start++)/10;
}
const int rows = 2;
const int cols = 10;
const int num_points = rows * cols;
thrust::host_vector<float> data(num_points);
// fill with random values
thrust::device_vector<float> d_r = data;
thrust::host_vector<int> h_segments(rows*cols);
thrust::generate(h_segments.begin(), h_segments.end(), my_mod);
thrust::device_vector<int> d_segments = h_segments;
thrust::stable_sort_by_key(d_r.begin(), d_r.end(), d_segments.begin());
thrust::stable_sort_by_key(d_segments.begin(), d_segments.end(),
d_r.begin());
虽然这种方法按预期排序每一行,但我不知道如何修改它以获取每个值的原始索引。
如果我只需要最大2
值及其位置,那么对我来说也许会排序整行可能会浪费。
答案 0 :(得分:2)
我改编了Robert Crovella在Determining the least element and its position in each matrix column with CUDA Thrust指出的方法。该方法考虑了确定最小值而不是最大值的问题,并产生了两个迭代器和一个向量:
requests
:迭代器指向每行的最后一个元素的索引; d_min_indices_1
:迭代器指向每行的倒数第二个元素的索引; d_min_indices_2
:原始矩阵,但每行按升序排列。可以从有序矩阵d_matrix
确定最后一个和倒数第二个元素的值。
d_matrix
如果要确定最大值,请更改两行
#include <iterator>
#include <algorithm>
#include <thrust/random.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <thrust/sort.h>
template <typename Iterator>
class strided_range
{
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;
stride_functor(difference_type stride)
: stride(stride) {}
__host__ __device__
difference_type operator()(const difference_type& i) const
{
return stride * i;
}
};
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}
iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
}
iterator end(void) const
{
return begin() + ((last - first) + (stride - 1)) / stride;
}
protected:
Iterator first;
Iterator last;
difference_type stride;
};
/**************************************************************/
/* CONVERT LINEAR INDEX TO ROW INDEX - NEEDED FOR APPROACH #1 */
/**************************************************************/
template< typename T >
struct mod_functor {
__host__ __device__ T operator()(T a, T b) { return a % b; }
};
/********/
/* MAIN */
/********/
int main()
{
/***********************/
/* SETTING THE PROBLEM */
/***********************/
const int Nrows = 4;
const int Ncols = 6;
// --- Random uniform integer distribution between 10 and 99
thrust::default_random_engine rng;
thrust::uniform_int_distribution<int> dist(10, 99);
// --- Matrix allocation and initialization
thrust::device_vector<float> d_matrix(Nrows * Ncols);
for (size_t i = 0; i < d_matrix.size(); i++) d_matrix[i] = (float)dist(rng);
for(int i = 0; i < Nrows; i++) {
std::cout << "[ ";
for(int j = 0; j < Ncols; j++)
std::cout << d_matrix[i * Ncols + j] << " ";
std::cout << "]\n";
}
/******************/
/* APPROACH NR. 2 */
/******************/
// --- Computing row indices vector
thrust::device_vector<int> d_row_indices(Nrows * Ncols);
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(Nrows * Ncols), thrust::make_constant_iterator(Ncols), d_row_indices.begin(), thrust::divides<int>() );
// --- Computing column indices vector
thrust::device_vector<int> d_column_indices(Nrows * Ncols);
thrust::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(Nrows * Ncols), thrust::make_constant_iterator(Ncols), d_column_indices.begin(), mod_functor<int>());
// --- int and float iterators
typedef thrust::device_vector<int>::iterator IntIterator;
typedef thrust::device_vector<float>::iterator FloatIterator;
// --- Relevant tuples of int and float iterators
typedef thrust::tuple<IntIterator, IntIterator> IteratorTuple1;
typedef thrust::tuple<FloatIterator, IntIterator> IteratorTuple2;
// --- zip_iterator of the relevant tuples
typedef thrust::zip_iterator<IteratorTuple1> ZipIterator1;
typedef thrust::zip_iterator<IteratorTuple2> ZipIterator2;
// --- zip_iterator creation
ZipIterator1 iter1(thrust::make_tuple(d_row_indices.begin(), d_column_indices.begin()));
thrust::stable_sort_by_key(d_matrix.begin(), d_matrix.end(), iter1);
ZipIterator2 iter2(thrust::make_tuple(d_matrix.begin(), d_column_indices.begin()));
thrust::stable_sort_by_key(d_row_indices.begin(), d_row_indices.end(), iter2);
typedef thrust::device_vector<int>::iterator Iterator;
// --- Strided access to the sorted array
strided_range<Iterator> d_min_indices_1(d_column_indices.begin(), d_column_indices.end(), Ncols);
strided_range<Iterator> d_min_indices_2(d_column_indices.begin() + 1, d_column_indices.end() + 1, Ncols);
printf("\n\n");
for(int i = 0; i < Nrows; i++) {
std::cout << "[ ";
for(int j = 0; j < Ncols; j++)
std::cout << d_matrix[i * Ncols + j] << " ";
std::cout << "]\n";
}
printf("\n\n");
std::copy(d_min_indices_1.begin(), d_min_indices_1.end(), std::ostream_iterator<int>(std::cout, " "));
std::cout << std::endl;
printf("\n\n");
std::copy(d_min_indices_2.begin(), d_min_indices_2.end(), std::ostream_iterator<int>(std::cout, " "));
std::cout << std::endl;
return 0;
}
与
strided_range<Iterator> d_min_indices_1(d_column_indices.begin(), d_column_indices.end(), Ncols);
strided_range<Iterator> d_min_indices_2(d_column_indices.begin() + 1, d_column_indices.end() + 1, Ncols);