我尝试使用thrust::unique
而不是float3
元组。但是,它似乎没有返回正确的结果。这是一个完整的例子:
#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
// --- Equality between two float3's
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }
// --- Binary predicate for a tuple pair
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
__host__ __device__
bool operator()(tuple_t x, tuple_t y)
{
return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
}
};
/********/
/* MAIN */
/********/
int main(void)
{
const int N = 6;
thrust::device_vector<float3> v(N), d(N);
thrust::device_vector<tuple_t> vd(N);
v[0] = make_float3(2.f, 5.f, 9.f); d[0] = make_float3(2.f, 3.f, 10.f);
v[1] = make_float3(3.f, 2.f, 1.f); d[1] = make_float3(2.f, 5.f, 9.f);
v[2] = make_float3(2.f, 5.f, 9.f); d[2] = make_float3(2.f, 3.f, 10.f);
v[3] = make_float3(2.f, 3.f, 10.f); d[3] = make_float3(2.f, 5.f, 9.f);
v[4] = make_float3(2.f, 3.f, 10.f); d[4] = make_float3(1.f, 1.f, 1.f);
v[5] = make_float3(2.f, 5.f, 9.f); d[5] = make_float3(2.f, 3.f, 10.f);
vd[0] = thrust::make_tuple(v[0], d[0]);
vd[1] = thrust::make_tuple(v[1], d[1]);
vd[2] = thrust::make_tuple(v[2], d[2]);
vd[3] = thrust::make_tuple(v[3], d[3]);
vd[4] = thrust::make_tuple(v[4], d[4]);
vd[5] = thrust::make_tuple(v[5], d[5]);
auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());
const size_t Nnew = new_end - vd.begin();
printf("Nnew = %d\n", Nnew);
for (int k = 0; k < Nnew; k++) {
tuple_t temp = vd[k];
float3 vtemp = thrust::get<0>(temp);
float3 dtemp = thrust::get<1>(temp);
printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
}
return 0;
}
我获得的结果是
Nnew = 6
0 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
1 3.000000 2.000000 1.000000 2.000000 5.000000 9.000000
2 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
3 2.000000 3.000000 10.000000 2.000000 5.000000 9.000000
4 2.000000 3.000000 10.000000 1.000000 1.000000 1.000000
5 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
这正是输入而没有任何重复删除。
我使用CUDA 8.0或CUDA 9.1编译Windows 10 Visual Studio 2015(结果相同)。
我的问题是:我做错了什么?
答案 0 :(得分:3)
您没有看到输出有任何变化的原因是您的输入不包含任何重复序列。如果我修改代码中的输入:
#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
__host__ __device__
bool operator()(tuple_t x, tuple_t y)
{
return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
}
};
int main(void)
{
const int N = 6;
thrust::device_vector<float3> v(N), d(N);
thrust::device_vector<tuple_t> vd(N);
v[0] = make_float3(2.f, 5.f, 9.f); d[0] = make_float3(2.f, 3.f, 10.f);
v[1] = make_float3(2.f, 5.f, 9.f); d[1] = make_float3(2.f, 3.f, 10.f);
v[2] = make_float3(2.f, 3.f, 10.f); d[2] = make_float3(2.f, 5.f, 9.f);
v[3] = make_float3(2.f, 3.f, 10.f); d[3] = make_float3(2.f, 5.f, 9.f);
v[4] = make_float3(2.f, 3.f, 10.f); d[4] = make_float3(2.f, 5.f, 9.f);
v[5] = make_float3(2.f, 3.f, 10.f); d[5] = make_float3(2.f, 5.f, 9.f);
vd[0] = thrust::make_tuple(v[0], d[0]);
vd[1] = thrust::make_tuple(v[1], d[1]);
vd[2] = thrust::make_tuple(v[2], d[2]);
vd[3] = thrust::make_tuple(v[3], d[3]);
vd[4] = thrust::make_tuple(v[4], d[4]);
vd[5] = thrust::make_tuple(v[5], d[5]);
auto new_end = thrust::unique(vd.begin(), vd.end(), tupleEqual());
const size_t Nnew = new_end - vd.begin();
printf("Nnew = %zu\n", Nnew);
for (int k = 0; k < Nnew; k++) {
tuple_t temp = vd[k];
float3 vtemp = thrust::get<0>(temp);
float3 dtemp = thrust::get<1>(temp);
printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
}
return 0;
}
因此它包含相同输入的序列,然后删除按预期工作:
$ nvcc -arch=sm_52 -std=c++11 -o float3 float3.cu
$ ./float3
Nnew = 2
0 2.000000 5.000000 9.000000 2.000000 3.000000 10.000000
1 2.000000 3.000000 10.000000 2.000000 5.000000 9.000000
thrust::unique仅删除输入迭代器中重复的相同序列。它没有排序。引自文档:
对于[first,last]范围内的每组连续元素 具有相同的值,unique除去除了第一个元素之外的所有元素 基。
强调我的。这里唯一的错误是了解函数执行的操作。您编写的代码是正确的,并按预期工作。
答案 1 :(得分:1)
Talonmies已经回答了我的问题,指出我的重复元素必须连续,我昨天失踪了。
我在下面提供了对我的代码的修改,通过引用v
和d
使用3D Morton code的双重排序。
#include <iostream>
#include <thrust/tuple.h>
#include <thrust/device_vector.h>
#include <thrust/unique.h>
#include <thrust/sort.h>
/*********************************/
/* EQUALITY BETWEEN TWO FLOAT3'S */
/*********************************/
__host__ __device__ __forceinline__ bool operator==(const float3 &a, const float3 &b) {
return ((a.x == b.x) && (a.y == b.y) && (a.z == b.z)); }
/*************************************/
/* BINARY PREDICATE FOR A TUPLE PAIR */
/*************************************/
typedef thrust::tuple<float3, float3> tuple_t;
struct tupleEqual
{
__host__ __device__
bool operator()(tuple_t x, tuple_t y)
{
return ((x.get<0>() == y.get<0>()) && (x.get<1>() == y.get<1>()));
}
};
/**********************************/
/* MORTON ENCODER KERNEL FUNCTION */
/**********************************/
// --- Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit.
__host__ __device__ __forceinline__ unsigned int expandBits(unsigned int v)
{
v = (v * 0x00010001u) & 0xFF0000FFu;
v = (v * 0x00000101u) & 0x0F00F00Fu;
v = (v * 0x00000011u) & 0xC30C30C3u;
v = (v * 0x00000005u) & 0x49249249u;
return v;
}
// --- Calculates a 30-bit Morton code for the given 3D point located within the unit cube [0,1].
__host__ __device__ __forceinline__ unsigned int morton3D(float x, float y, float z)
{
x = min(max(x * 1024.0f, 0.0f), 1023.0f);
y = min(max(y * 1024.0f, 0.0f), 1023.0f);
z = min(max(z * 1024.0f, 0.0f), 1023.0f);
unsigned int xx = expandBits((unsigned int)x);
unsigned int yy = expandBits((unsigned int)y);
unsigned int zz = expandBits((unsigned int)z);
return xx * 4 + yy * 2 + zz;
}
/*************************/
/* CUSTOMIZED COMPARATOR */
/*************************/
struct customizedComparator {
__host__ __device__
bool operator()(const tuple_t &t1, const tuple_t &t2) {
float3 v1 = t1.get<0>();
float3 d1 = t1.get<1>();
float3 v2 = t2.get<0>();
float3 d2 = t2.get<1>();
unsigned int m1 = morton3D(v1.x, v1.y, v1.z);
unsigned int n1 = morton3D(v2.x, v2.y, v2.z);
unsigned int p1 = morton3D(d1.x, d1.y, d1.z);
unsigned int q1 = morton3D(d2.x, d2.y, d2.z);
if (m1 != n1) return (m1 < n1);
else return (p1 < q1);
}
};
/********/
/* MAIN */
/********/
int main(void)
{
const int N = 6;
thrust::device_vector<float3> v(N), d(N);
v[0] = make_float3(.2f, .5f, .09f); d[0] = make_float3(0.2f, 0.3f, 0.1f);
v[1] = make_float3(.3f, .2f, .1f); d[1] = make_float3(.2f, .5f, .09f);
v[2] = make_float3(.2f, .5f, .09f); d[2] = make_float3(0.2f, 0.3f, 0.1f);
v[3] = make_float3(0.2f, 0.3f, 0.1f); d[3] = make_float3(.2f, .5f, .09f);
v[4] = make_float3(0.2f, 0.3f, 0.1f); d[4] = make_float3(.1f, .1f, .1f);
v[5] = make_float3(.2f, .5f, .09f); d[5] = make_float3(0.2f, 0.3f, 0.1f);
thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, customizedComparator());
auto new_end = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())), thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin())) + N, tupleEqual());
const size_t Nnew = new_end - thrust::make_zip_iterator(thrust::make_tuple(v.begin(), d.begin()));
printf("Nnew = %d\n", Nnew);
for (int k = 0; k < Nnew; k++) {
float3 vtemp = v[k];
float3 dtemp = d[k];
printf("%d %f %f %f %f %f %f\n", k, vtemp.x, vtemp.y, vtemp.z, dtemp.x, dtemp.y, dtemp.z);
}
return 0;
}