我很擅长(cuda),并且发现了一些具有挑战性的事情。
(要简化的编辑问题)我有一个输入向量和一张地图:
vector = [8,23,46,500,2,7,91,91]
map = [1, 0, 4, 3,1,0, 5, 3]
我想扩展它并将值增加为:
new_vec = [8,46,47,48,49,500,501,502,2,91,92,93,94,95,91,92,93]
答案 0 :(得分:2)
调整Thrust expand
示例以使用exclusive_scan_by_key
对其子序列中的每个输出元素进行排名,然后按该等级递增:
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/gather.h>
#include <thrust/scan.h>
#include <thrust/fill.h>
#include <thrust/copy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/functional.h>
#include <iterator>
#include <iostream>
template<typename Vector>
void print(const std::string& s, const Vector& v)
{
typedef typename Vector::value_type T;
std::cout << s;
thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, " "));
std::cout << std::endl;
}
template<typename InputIterator1,
typename InputIterator2,
typename OutputIterator>
void expand_and_increment(InputIterator1 first1,
InputIterator1 last1,
InputIterator2 first2,
OutputIterator output)
{
typedef typename thrust::iterator_difference<InputIterator1>::type difference_type;
difference_type input_size = thrust::distance(first1, last1);
difference_type output_size = thrust::reduce(first1, last1);
// scan the counts to obtain output offsets for each input element
thrust::device_vector<difference_type> output_offsets(input_size);
thrust::exclusive_scan(first1, last1, output_offsets.begin());
print("output_offsets ", output_offsets);
// scatter the nonzero counts into their corresponding output positions
thrust::device_vector<difference_type> output_indices(output_size);
thrust::scatter_if
(thrust::counting_iterator<difference_type>(0),
thrust::counting_iterator<difference_type>(input_size),
output_offsets.begin(),
first1,
output_indices.begin());
// compute max-scan over the output indices, filling in the holes
thrust::inclusive_scan
(output_indices.begin(),
output_indices.end(),
output_indices.begin(),
thrust::maximum<difference_type>());
print("output_indices ", output_indices);
// gather input values according to index array (output = first2[output_indices])
OutputIterator output_end = output; thrust::advance(output_end, output_size);
thrust::gather(output_indices.begin(),
output_indices.end(),
first2,
output);
// rank output_indices
thrust::device_vector<difference_type> ranks(output_size);
thrust::exclusive_scan_by_key(output_indices.begin(), output_indices.end(),
thrust::make_constant_iterator<difference_type>(1),
ranks.begin());
print("ranks ", ranks);
// increment output by ranks
thrust::transform(output, output + output_size, ranks.begin(), output, thrust::placeholders::_1 + thrust::placeholders::_2);
}
int main(void)
{
int values[] = {8,23,46,500,2,7,91,91};
int counts[] = {1, 0, 4, 3,1,0, 5, 3};
size_t input_size = sizeof(counts) / sizeof(int);
size_t output_size = thrust::reduce(counts, counts + input_size);
// copy inputs to device
thrust::device_vector<int> d_counts(counts, counts + input_size);
thrust::device_vector<int> d_values(values, values + input_size);
thrust::device_vector<int> d_output(output_size);
// expand values according to counts
expand_and_increment(d_counts.begin(), d_counts.end(),
d_values.begin(),
d_output.begin());
std::cout << "Expanding and incrementing values according to counts" << std::endl;
print(" counts ", d_counts);
print(" values ", d_values);
print(" output ", d_output);
return 0;
}
输出:
$ nvcc expand_and_increment.cu -run
output_offsets 0 1 1 5 8 9 9 14
output_indices 0 2 2 2 2 3 3 3 4 6 6 6 6 6 7 7 7
ranks 0 0 1 2 3 0 1 2 0 0 1 2 3 4 0 1 2
Expanding and incrementing values according to counts
counts 1 0 4 3 1 0 5 3
values 8 23 46 500 2 7 91 91
output 8 46 47 48 49 500 501 502 2 91 92 93 94 95 91 92 93