嵌套调用CUDA :: fun函数作为zip_iterator运行的仿函数

时间:2014-01-22 16:20:37

标签: c++ cuda thrust

我发现在使用CUDA :: Thurst迭代器来实现在GPU上运行的ODE求解器例程时,我发现一些困难,以解决GPU中一堆耦合的一阶方程。我想解决前question中的方法,使用户能够使用处理向量元组的任意函子尽可能地编写方程组。详细信息,这里有一小段代码:

#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>

#include <iostream>
#include <math.h>


__host__ __device__ float f1(float x)
{
  return sinf(x);
}

__host__ __device__ float f2(float x)
{
  return cosf(x);
}

__host__ __device__ float Vx(float x)
{
  return sinf(x);
}

struct q_dot
{
  float x;
  float delta;
  q_dot(float _x,float _delta): x(_x),delta(_delta){};
  template <typename Tuple>
  __host__ __device__
  float operator()(Tuple t)
  {
    float p = thrust::get<1>(t) + delta;
    return  p/MASS;
  }
};


struct p_dot
{
  float x;
  float delta;
  p_dot(float _x,float _delta): x(_x),delta(_delta){};
  template <typename Tuple>
  __host__ __device__
  float operator()(Tuple t)
  {
    float q = thrust::get<0>(t) +   delta;
    return  -Vx(q);
  }
};




struct euler_functor
{
  unsigned fn;
  float h;
  float er;
  float x0;

  euler_functor(unsigned _fn,float _x0,float _h, float _er) : fn(_fn),h(_h),er(_er),x0(_x0) {};
  template <typename Tuple>
  __host__ __device__
  void operator()(Tuple t) const  {
    // if (fn == 1) y = h*f1(y);
    //else if (fn == 2) y = h*f2(y); This can be handled in this way?

    q =  h*p_dot(x0,h/2)(t);
    p =  h*p_dot(x0,h/2)(t);
    float er_p,er_q;
    er_p=0.5*h*p_dot(x0,h/2)(t);
    er_q=0.5*h*q_dot(x0,h/2)(t);
    er = er_p;

  }
};


int main(void)
{
  float t=0;
  float t_step=0.1;
  float error;


  const unsigned N = 8;
  // allocate three device_vectors with 10 elements
  thrust::device_vector<float> Q(N),P(N);
  // initilaize to some values
  thrust::sequence(Q.begin(), Q.end(),  0.0f, (float)(6.283/(float)N));
  // initilaize to some values
  thrust::sequence(P.begin(), P.end(),  0.0f, (float)(10.283/(float)N));

  // apply euler for each element of Q and P
  //thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
  thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin())),
                   thrust::make_zip_iterator(thrust::make_tuple(  Q.end(),  P.end())),euler_functor(1,t,t_step,error));
  // print the values
  for(int i = 0; i < N; i++) std::cout<< Q[i]<<"  "<<P[i]<< std::endl;
}

但是当我编译前代码时,我遇到了很多错误。同样,我不确定这是否是最好的方法。我怎样才能使它工作?我的错误在哪里?有更好的方法吗?因为带有关于数值误差的信息的er变量总是返回零。如何获得这些信息?它可用于实现一些自适应技巧。

1 个答案:

答案 0 :(得分:0)

您的代码存在许多问题。我不确定我会在这里捕捉所有这些,但是:

  1. MASS未定义。
  2. 您的p_dotq_dot仿函数需要额外的__device__装饰品
  3. 您在euler仿函数中使用pq变量没有任何意义;它们没有在任何地方定义,如果这是你的意图,这也不是将值返回PQ向量的正确方法。
  4. 我们不会通过在实例化时传递给仿函数的变量返回数据。因此,要在每个时间步长返回er变量,我会创建一个单独的向量(ERPERQ)来执行此操作。
  5. 这是一个修改过的代码,它修复了上述问题和各种其他问题。它似乎返回了明智的结果,虽然我没有仔细检查算术。

    #include <thrust/device_vector.h>
    #include <thrust/transform.h>
    #include <thrust/sequence.h>
    #include <thrust/copy.h>
    #include <thrust/fill.h>
    #include <thrust/replace.h>
    #include <thrust/functional.h>
    #include <thrust/for_each.h>
    #include <thrust/iterator/zip_iterator.h>
    
    #include <iostream>
    #include <math.h>
    
    #define MASS 1.0f
    
    __host__ __device__ float f1(float x)
    {
      return sinf(x);
    }
    
    __host__ __device__ float f2(float x)
    {
      return cosf(x);
    }
    
    __host__ __device__ float Vx(float x)
    {
      return sinf(x);
    }
    
    struct q_dot
    {
      float x;
      float delta;
      __host__ __device__
      q_dot(float _x,float _delta): x(_x),delta(_delta){};
      template <typename Tuple>
      __host__ __device__
      float operator()(Tuple t)
      {
        float p = thrust::get<1>(t) + delta;
        return  p/MASS;
      }
    };
    
    
    struct p_dot
    {
      float x;
      float delta;
      __host__ __device__
      p_dot(float _x,float _delta): x(_x),delta(_delta){};
      template <typename Tuple>
      __host__ __device__
      float operator()(Tuple t)
      {
        float q = thrust::get<0>(t) +   delta;
        return  -Vx(q);
      }
    };
    
    
    
    struct euler_functor
    {
      unsigned fn;
      float h;
      float x0;
    
      euler_functor(unsigned _fn,float _x0,float _h) : fn(_fn),h(_h),x0(_x0) {};
      template <typename Tuple>
      __host__ __device__
      void operator()(const Tuple &t) {
        // if (fn == 1) y = h*f1(y);
        //else if (fn == 2) y = h*f2(y); 
        float t0, t1, t2, t3;
        t0 =  h*p_dot(x0,h/2.0f)(t);
        t1 =  h*q_dot(x0,h/2.0f)(t);
        t2=0.5*h*p_dot(x0,h/2.0f)(t);
        t3=0.5*h*q_dot(x0,h/2.0f)(t);
        thrust::get<0>(t) = t0;
        thrust::get<1>(t) = t1;
        thrust::get<2>(t) = t2;
        thrust::get<3>(t) = t3;
    
      }
    };
    
    
    int main(void)
    {
      float t=0;
      float t_step=0.1;
    
    
      const unsigned N = 8;
      // allocate three device_vectors with 10 elements
      thrust::device_vector<float> Q(N),P(N), ERP(N), ERQ(N);
      // initilaize to some values
      thrust::sequence(Q.begin(), Q.end(),  0.0f, (float)(6.283/(float)N));
      // initilaize to some values
      thrust::sequence(P.begin(), P.end(),  0.0f, (float)(10.283/(float)N));
      for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
      std::cout<< "*****" << std::endl;
      // apply euler for each element of Q and P
      //thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
      thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin(),ERP.begin(), ERQ.begin())),thrust::make_zip_iterator(thrust::make_tuple(Q.end(),P.end(),ERP.end(), ERQ.end())),euler_functor(1,t,t_step));
      // print the values
      for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
    }