我想为 vector c++ - CUDA推力zip_iterator元组transform_reduce-LMLPHPc++ - CUDA推力zip_iterator元组transform_reduce-LMLPHP计算c++ - CUDA推力zip_iterator元组transform_reduce-LMLPHP,其中c++ - CUDA推力zip_iterator元组transform_reduce-LMLPHP表示 vector c++ - CUDA推力zip_iterator元组transform_reduce-LMLPHP的大小。由于这涉及取两个 vector 的每个对应分量之间的差的平方和的平方根,因此它应该是高度可并行的任务。我正在Windows 10上通过Cygwin使用Cuda和Thrust,Cuda和Thrust都可以正常工作。

下面的代码可以编译并运行(使用nvcc),但这只是因为我在main的底部注释了三行,我认为每行都应该起作用,但不行。 func::operator()(tup t)认为我传递的参数实际上不是tup类型。

我还注释掉了运算符的实际主体,以使其更有可能至少被编译。运算符(operator)应该找到输入tup元素之间的平方差。然后从unary_op(在本例中为transform_reduce)中减去func(),将它们相加,得到 vector 差的平方平方。

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

typedef thrust::device_vector<float> dvec;
typedef dvec::iterator iter;
typedef thrust::tuple<iter, iter> tup;

struct func: public thrust::unary_function<tup, float>
{
  __device__ float operator()(tup t) //difsq
  {
    // I've commented out these two lines for testing purposes:
    // float f = thrust::get<0>(t) - thrust::get<1>(t);
    // return f*f;
    return 3.14;
  }
};

int main()
{
  dvec a(40, 4.f);
  dvec b(40, 3.f);
  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

  //these two lines work
  thrust::get<0>(begin[0]);
  std::cout << thrust::get<0>(begin[0]) - thrust::get<1>(begin[0]);


  //these three lines do not
  //thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>());
  //func()(begin[0]);
  //thrust::transform(begin, end, begin, func());


  std::cout << "done" << std::endl;
  return 0;
}

我收到此错误:(我的程序称为sandbox.cu)
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(310): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
        argument types are: (const thrust::device_reference<float>)
      detected during:
        instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
        instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/detail/tuple.inl(411): error: no instance of constructor "thrust::detail::normal_iterator<Pointer>::normal_iterator [with Pointer=thrust::device_ptr<float>]" matches the argument list
            argument types are: (const thrust::device_reference<float>)
          detected during:
            instantiation of "thrust::detail::cons<HT, thrust::null_type>::cons(const thrust::detail::cons<HT2, thrust::null_type> &) [with HT=iter, HT2=thrust::device_reference<float>]"
(310): here
            instantiation of "thrust::detail::cons<HT, TT>::cons(const thrust::detail::cons<HT2, TT2> &) [with HT=iter, TT=thrust::detail::cons<iter, thrust::null_type>, HT2=thrust::device_reference<float>, TT2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5\bin/../include\thrust/tuple.h(361): here
            instantiation of "thrust::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>::tuple(const thrust::detail::cons<U1, U2> &) [with T0=iter, T1=iter, T2=thrust::null_type, T3=thrust::null_type, T4=thrust::null_type, T5=thrust::null_type, T6=thrust::null_type, T7=thrust::null_type, T8=thrust::null_type, T9=thrust::null_type, U1=thrust::device_reference<float>, U2=thrust::detail::cons<thrust::device_reference<float>, thrust::null_type>]"
sandbox.cu(37): here

2 errors detected in the compilation of "C:/cygwin64/tmp/tmpxft_00001a90_00000000-10_sandbox.cpp1.ii".

最佳答案

解决了! tup应该是thrust::tuple<float, float>,而不是thrust::tuple<iter, iter>。完整解决方案:

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

typedef thrust::device_vector<float> dvec;
typedef thrust::tuple<float, float> tup;

struct func
{
  __device__ float operator()(tup t) //difsq
  {
     float f = thrust::get<0>(t) - thrust::get<1>(t);
     return f*f;
  }
};

int main()
{
  dvec a(4, 3.f);
  dvec b(4, 2.f);
  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));
  std::cout << thrust::transform_reduce(begin, end, func(), 0.0f, thrust::plus<float>()) << std::endl;
  std::cout << "done" << std::endl;
  return 0;
}

关于c++ - CUDA推力zip_iterator元组transform_reduce,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/36436432/

10-11 18:03