我测试了一个最小测试代码,用数组“a”
的每个元素填充数组“c”它表明当使用常量输入调用嵌套的thrust :: fill时,它会正确填充输入数组的输入值。
但是,如果输入值是一个变化的值,即值数组的每个元素,它可能只用一个(第一个或最后一个)值填充输入数组
#include <thrust/inner_product.h>
#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/random.h>
#include <thrust/execution_policy.h>
#include <iostream>
#include <cmath>
#include <boost/concept_check.hpp>
struct bFuntor
{
bFuntor(int* av__, int* cv__, const int& N__) : av_(av__), cv_(cv__), N_(N__) {};
__host__ __device__
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
// ** this thrust::fill with varied values does not work
thrust::fill(thrust::device,c0,c1,av_[idx]);
// ** this thrust::fill with constant works
// thrust::fill(thrust::device,c0,c1,10);
printf("fill result:\n");
for (int i=0; i<N_; i++)
printf("fill value: %d -> return value: %d \n",av_[idx],cv_[i]);
printf("\n");
return cv_dpt[idx];
}
int* av_;
int* cv_;
int N_;
};
int main(void)
{
int N = 2;
std::vector<int> av = {0,1};
std::vector<int> cv = {-1,-2};
thrust::device_vector<int> av_d(N);
thrust::device_vector<int> cv_d(N);
av_d = av; cv_d = cv;
// call with nested manner
thrust::transform(thrust::counting_iterator<int>(0),
thrust::counting_iterator<int>(N),
cv_d.begin(),
bFuntor(thrust::raw_pointer_cast(av_d.data()),
thrust::raw_pointer_cast(cv_d.data()),
N));
return 0;
}
不同输入值的输出情况:
fill result:
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
fill value: 0 -> return value: 1
fill value: 1 -> return value: 1
常量输入值的输出情况:
fill result:
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
fill value: 10 -> return value: 10
是这个推力的问题吗?或者不应该这样使用?
答案 0 :(得分:1)
这是数据竞赛的一个例子:
int operator()(const int& idx)
{
thrust::device_ptr<int> cv_dpt = thrust::device_pointer_cast(cv_);
thrust::device_ptr<int> cv_dpt1 = thrust::device_pointer_cast(cv_+N_);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c0 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt);
thrust::detail::normal_iterator<thrust::device_ptr<int>> c1 = thrust::detail::make_normal_iterator<thrust::device_ptr<int>>(cv_dpt1);
thrust::fill(thrust::device,c0,c1,av_[idx]);
//.....
}
这里,对仿函数的每次调用都会尝试用不同的值填充相同的迭代器范围(c0到c1)。显然,当并行发生多个函子调用时会产生问题。