我得到了一个函数f(x,y,z),其中值为1和0,我需要得到值的前100个坐标,等于1,以减少/更新它们为0
这在c和其他语言中很容易实现,但是,我一直试图用Halide解决它几天。是否有任何函数或算法可用于在卤化物发生器中解决它?
答案 0 :(得分:1)
问题相当于"如何在Halide中实现流压缩?"有很多关于并行流压缩的文章,并且做得好有点不重要。有关讨论和参考文献,请参阅此文章的Stack Overflow答案:CUDA stream compaction algorithm
使用前缀sum在Halide中快速实现简单流压缩如下所示:
#include "Halide.h"
#include <iostream>
using namespace Halide;
static void print_1d(const Buffer<int32_t> &result) {
std::cout << "{ ";
const char *prefix = "";
for (int i = 0; i < result.dim(0).extent(); i++) {
std::cout << prefix << result(i);
prefix = ", ";
}
std::cout << "}\n";
}
int main(int argc, char **argv) {
uint8_t vals[] = {0, 10, 99, 76, 5, 200, 88, 15};
Buffer<uint8_t> in(vals);
Var x;
Func prefix_sum;
RDom range(1, in.dim(0).extent() - 1);
prefix_sum(x) = (int32_t)0;
prefix_sum(range) = select(in(range - 1) > 42, prefix_sum(range - 1) + 1, prefix_sum(range - 1));
RDom in_range(0, in.dim(0).extent());
Func compacted_indices;
compacted_indices(x) = -1;
compacted_indices(clamp(prefix_sum(in_range), 0, in.dim(0).extent() - 1)) = select(in(in_range) > 42, in_range, - 1);
Buffer<int32_t> sum = prefix_sum.realize(8);
Buffer<int32_t> indices = compacted_indices.realize(8);
print_1d(sum);
print_1d(indices);
return 0;
}