我正在更新直方图,该直方图使用具有16个区间的简单整数数组表示,如下所示。
const int binSize = 4096;
int histogram[16];
unsigned short inData[1024]; // This is my input data. Short is 16 bits
for(int i = 0; i < 1024; ++i)
{
++histogram[inData[i] / binSize];
}
我经常运行此操作,因此这成为瓶颈,因为这个循环没有被DSP并行化,因为多个bin无法同时更新。我该如何优化呢?
我在TI DSP C6000系列上运行此代码。
答案 0 :(得分:1)
举一个评论意思的例子:
#include <array>
#include <algorithm>
#include <boost/range/adaptor/transformed.hpp>
using Histogram = std::array<int, 16>;
Histogram from_short(short num)
{
Histogram result;
result[num / 4096] = 1;
return result;
}
Histogram add(const Histogram & lhs, const Histogram & rhs)
{
Histogram result;
for (size_t i = 0; i < 16; ++i) { result[i] = lhs[i] + rhs[i]; }
return result;
}
auto singles = indata | boost::adaptors::transformed(from_short);
Histogram histogram = std::reduce(begin(singles), end(singles), Histogram{}, add);
另一种选择:
std::sort(begin(indata), end(indata));
short * previous = begin(indata);
for (size_t i = 0; i < 15; ++i)
{
short * current = std::lower_bound(indata, 4096 * (i + 1));
histogram[i] = std::distance(previous, current);
previous = current;
}
histogram[16] = std::distance(previous, end(indata));