根据以下标准,我很难决定使用哪种算法或哪种算法来查找对象: 有两个类:' TileSets'和' Tile'。 TileSet有2个int属性:firstTileId和lastTileId,而Tile有一个int属性:id,就像那样:
struct TileSet { int firstTileId, lastTileId; }
struct Tile { int id; }
应用程序应该有不超过10个TileSet(通常为3-5)和10.000+ Tiles。速度对于确定具有给定id的TileSet哪个TileSet属于哪个非常重要。将tileset添加到向量后,第一个和最后一个id属性不会发生变化,并且它们不会相互重叠,例如:{{1,25},{26,125},{126, 781},{782,789} ...}。我们可以看到,瓷砖范围内没有任何孔。瓷砖矢量不是订购也不是。我目前的实现(伪短代码的种类)是:
Vector t = 10.000+ tiles
Vector ts = tilesets with a size of a number of a power of 2 number bigger than 6, at least
for tileIndex = 0; tileIndex < t.size; tileIndex++, do:
for tilesetIndex = 0; tilesetIndex < ts.size; tilesetIndex++, do:
if (ts[tilesetIndex].firstTileId >= t[tileIndex].id && t[tileIndex].id <= ts[tilesetIndex].lastTileId)
// tile t[tileIndex] belongs to the tileset ts[tilesetIndex]! Done!
答案 0 :(得分:7)
在这个使用Boost ICL的例子中,我做了一些“任意”的选择来生成漂亮的析取TileSets
using TileSets = icl::split_interval_set<int>;
struct TileSet : TileSets::interval_type::type {
TileSet(int b, int e) : TileSets::interval_type(closed(b, e)) {}
struct Tile : TileSets::interval_type::type {
Tile(int id) : TileSets::interval_type(closed(id, id)) {}
<强> Live On Coliru 强>
TileSets gen_tiles (size_t n = 100000);
TileSets gen_tilesets(size_t n = (2ull << 8) + 1);
#include <iostream>
int main() {
auto const tiles = gen_tiles (10);
auto const ts = gen_tilesets(30);
std::cout << ts << "\n----\n";
for (auto hit : tiles & ts) {
std::cout << hit.lower() << " hits in tileset " << *ts.find(hit) << "\n";
1561 hits in tileset (1560,1706)
1835 hits in tileset [1835,1997)
3746 hits in tileset (3664,3825]
4459 hits in tileset (4320,4506]
5969 hits in tileset (5925,6012]
5987 hits in tileset (5925,6012]
7320 hits in tileset [7220,7852]
7797 hits in tileset [7220,7852]
7966 hits in tileset (7852,8325)
9508 hits in tileset (9489,9657]
使用默认尺寸(2 ^ 8 + 1个图块集中的100000个图块)运行时,我的框上需要0.034秒
$ time ./test | tee >(echo "total lines: $(wc -l)") | tail
9987 hits in tileset (9984,9990]
9988 hits in tileset (9984,9990]
9989 hits in tileset (9984,9990]
9990 hits in tileset (9984,9990]
9991 hits in tileset (9990,9995]
9992 hits in tileset (9990,9995]
9993 hits in tileset (9990,9995]
9994 hits in tileset (9990,9995]
9995 hits in tileset (9990,9995]
total lines: 9988
real 0m0.034s
user 0m0.029s
sys 0m0.008s
Live On Coliru 以0.064秒运行。这包括输出所需的时间,它执行冗余查找(ts.find(hit)
#include <boost/icl/interval_set.hpp>
#include <boost/icl/split_interval_set.hpp>
namespace icl = boost::icl;
using TileSets = icl::split_interval_set<int>;
struct TileSet : TileSets::interval_type::type {
TileSet(int b, int e) : TileSets::interval_type(closed(b, e)) {}
struct Tile : TileSets::interval_type::type {
Tile(int id) : TileSets::interval_type(id) {}
TileSets gen_tiles (size_t n = (1ull << 22));
TileSets gen_tilesets(size_t n = (1ull << 12));
#include <iostream>
#include <iomanip>
#include <boost/chrono/chrono_io.hpp>
template <typename F>
auto timed(char const* task, F&& f) {
using namespace boost::chrono;
struct _ {
high_resolution_clock::time_point s;
const char* task;
~_() { std::cout << " -- (" << task << " completed in " << duration_cast<milliseconds>(high_resolution_clock::now() - s) << ")\n"; }
} timing { high_resolution_clock::now(), task };
return f();
int main() {
auto const tiles = timed("Generate tiles", [] { return gen_tiles(); });
auto const ts = timed("Generate tile sets", [] { return gen_tilesets(); });
//std::cout << ts << "\n----\n";
std::cout << "Random tiles generated: " << tiles.iterative_size() << " across a domain of " << std::setprecision(2) << static_cast<double>(tiles.size()) << "\n";
std::cout << "Tilesets to match against: " << ts.iterative_size() << " across a domain of " << std::setprecision(2) << static_cast<double>(tiles.size()) << "\n";
timed("Query intersection", [&] { std::cout << "Total number of hits: " << (tiles & ts).iterative_size() << "\n"; });
timed("Query difference", [&] { std::cout << "Total number of misses: " << (tiles - ts).iterative_size() << "\n"; });
//for (auto hit : tiles & ts) {
//std::cout << hit.lower() << " hits in tileset " << *ts.find(hit) << "\n";
#include <random>
static auto gen_tile_id = [prng=std::mt19937{42}, dist=std::uniform_int_distribution<>()] () mutable
{ return dist(prng); };
TileSets gen_tiles(size_t n) {
TileSets r;
std::generate_n(icl::inserter(r, r.end()), n, [] () -> Tile { return gen_tile_id(); });
return r;
TileSets gen_tilesets(size_t n) {
TileSets r;
std::generate_n(icl::inserter(r, r.end()), n, [] () -> TileSet {
auto b = gen_tile_id(), e = gen_tile_id();
return { std::min(b,e), std::max(b,e) };
return r;
-- (Generate tiles completed in 3773 milliseconds)
-- (Generate tile sets completed in 152 milliseconds)
Random tiles generated: 4190133 across a domain of 4.2e+06
Tilesets to match against: 8191 across a domain of 4.2e+06
Total number of hits: 4187624
-- (Query intersection completed in 1068 milliseconds)
Total number of misses: 2509
-- (Query difference completed in 533 milliseconds)
答案 1 :(得分:2)
如果tile id是整数且不够大,则可以创建一个查找表。对于每个id,您只需记录此ID所属的tileset数。像这样的东西
for set in tilesets
for id=set.first to set.last
setLookup[id] = set.number
如果您的tile id不是整数,或者可能太大而查找表变得不切实际,则第二种方法有效。然后事先对所有tileset进行排序,使其first
答案 2 :(得分:2)
对于这个问题,我会使用优化的二叉树搜索,考虑间隔的大小。 如果tile id具有均匀分布,则可能有意义地最小化确定所需的比较计数 TileSet的TileSet具有更大的间隔。这个想法提醒了哈夫曼编码算法,其中构建了二叉树 更频繁符号的编码方式,树中的路径被最小化
[0,2), [2,9), [9,34), [34,39), [39,48), [48,148), [148,153), [153,154)
length = 154
逐一比较(如问题中所示) 如果Tile属于第一个TileSet,那么要找到第一个TileSet需要一个比较; 如果Tile属于第二个TileSet,则需要进行两次比较, 如果Tile属于第三个TileSet,则需要进行三次比较,依此类推:
C1 = (2*1 + 7*2 + 25*3 + 5*4 + 9*5 + 100*6 + 5*7 + 1*8)/length = 799/154 = 4.84
/ \
/ \
/ \
/ \ / \
/ \ / \
/\ /\ /\ /\
2 7 25 5 9 100 5 1
C2 = 3
/ \
/ \
/ \ \
/\ \ / \
/\ \ /\ / /\
2 7 25 5 9 100 5 1
C3 = (2*4+7*4+25*3+5*3+9*3+100*2+5*3+1*3)/154 = 2.41
树按以下方式构建:将TileSets分成两部分,使左侧部分和右侧部分的权重之和之间的差异最小化。 举个例子:
[2,7,25,5,9,100,5,1] => [2,7,25,5,9],[100,5,1]
答案 3 :(得分:1)
for (int i=0; i<10000; ++i) {
for (int j=0; j<8; j++) {
if ((tiles[i] >= lowerBounds[j]) &&
(tiles[i] <= upperBounds[j])) {
ids[i] = j;
for (int i=0; i<10000; ++i) {
for (int j=0; j<8; ++j) {
short int ld = range[j] - tiles[i] + lowerBounds2[j];
ld = ld<0?0:ld;
ld = ld>(range[j]-1)?0:ld;
ld = ld>1?1:ld;
ids2[i] += j*ld;
如果您要求g ++优化代码,第二个解决方案在i5-4200U上快10倍,因为我们没有时间使用AVX内在函数等:
g++ -std=c++11 -O3 -march=native
Trivial: 0.147607 ms
Optimized: 0.014068 ms
Trivial: 0.043876 ms
Optimized: 0.004328 ms
#include <iostream>
#include <random>
#include <chrono>
#include <cstring>
using namespace std;
using namespace std::chrono;
int main() {
short int lowerBounds [8] = {0, 2, 9, 34, 39, 48, 148, 153};
short int upperBounds [8] = {1, 8, 33, 38, 47, 147, 152, 154};
short int range [8] = {3, 8, 26, 6, 10, 101, 6, 3};
short int lowerBounds2[8] = {-1, 1, 8, 33, 38, 47, 147, 152};
short int tiles [10000];
short int ids [10000] = {0};
short int ids2 [10000] = {0};
// 10,000 random tiles
default_random_engine gen;
uniform_int_distribution<short int> dist(0, 154);
for (int i=0; i<10000; ++i) {
tiles[i] = dist(gen);
// *** trivial solution
double bestTime = 1.0;
for (int r=0; r<100; r++) {
auto t1 = high_resolution_clock::now();
for (int i=0; i<10000; ++i) {
for (int j=0; j<8; j++) {
if ((tiles[i] >= lowerBounds[j]) &&
(tiles[i] <= upperBounds[j])) {
ids[i] = j;
auto t2 = high_resolution_clock::now();
auto elapsed = duration_cast<duration<double>>(t2 - t1).count();
if (elapsed < bestTime)
bestTime = elapsed;
cout<<"Trivial: "<<bestTime*1000<<" ms"<<endl;
// *** optimized solution
bestTime = 1.0;
for (int r=0; r<100; r++) {
// ids should be zero for this method
memset(ids2, 0, 10000*sizeof(short int));
auto t1 = high_resolution_clock::now();
for (int i=0; i<10000; ++i) {
for (int j=0; j<8; ++j) {
short int ld = range[j] - tiles[i] + lowerBounds2[j];
ld = ld<0?0:ld;
ld = ld>(range[j]-1)?0:ld;
ld = ld>1?1:ld;
ids2[i] += j*ld;
auto t2 = high_resolution_clock::now();
auto elapsed = duration_cast<duration<double>>(t2 - t1).count();
if (elapsed < bestTime)
bestTime = elapsed;
cout<<"Optimized: "<<bestTime*1000<<" ms"<<endl;
// validate
for (int i=0; i<10000; i++)
if ((ids[i] - ids2[i]) != 0) {
cout<<"The results didn't match!"<<endl;
答案 4 :(得分:0)
auto findTileSetIndex(const Vector& sets,
size_t start, size_t end,
const Tile& value)
-> signed int {
if(start == end) return -1;
size_t mid = start + (end-start)/2;
if(sets[mid].firstTileId <= t[tileIndex].id &&
sets[mid].lastTileId > t[tileIndex].id)
return mid;
if(sets[mid].firstTileId > t[tileIndex].id)
return findTileSetIndex(sets, start, mid, value);
return findTileSetIndex(sets, mid, end, value);
for(auto& tile : t) {
auto tileSetIndex = findTileSetIndex(ts, 0, ts.size(), t);
if(tileSetIndex > 0) {
// t belongst to ts[tileSetIndex]