使用Boost查询点

时间:2015-06-26 20:32:10

标签: c++ boost boost-geometry

使用boost查询点时,我似乎得到了错误的结果。使用单独的算法(使用BST),我得到每个查询平均2000点的答案,但使用提升我得到10分的答案。这是我第一次使用Boost,所以有人可以帮助我解决我的问题。

关于代码:我放了1M个随机点(x,y在0和1之间)。然后我运行100个小随机区域的查询并计算匹配数。

#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>

#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp> 
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>

using namespace std;
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;    

struct particle
{
  double x;
  double y;
};

int main(int argc, char *argv[])
{
  int N=1000000;
  clock_t start,stop;

  vector<particle> myvec(N);
  vector<particle>::iterator cii;
  //Set vector values
  for (cii = myvec.begin(); cii != myvec.end(); ++cii)
    {
    cii->x =1.0*rand()/RAND_MAX;
    cii->y =1.0*rand()/RAND_MAX;
    }

  //Build R-tree
  start=clock();
  bgi::rtree<point, bgi::quadratic<16> > rtree;
  for (cii=myvec.begin();cii!=myvec.end(); ++cii)
    { 
     double x = cii->x;
     double y = cii->y;
     point p(x,y);
     rtree.insert(p);
    }
  stop=clock();
  cout<<"Time for building R tree "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;

  //Build Query List
  vector<particle> querylist(100);
  for (cii = querylist.begin(); cii != querylist.end(); ++cii)
    {
    cii->x =1.0*rand()/RAND_MAX;
    cii->y =1.0*rand()/RAND_MAX;
    }

  //Query R-tree
  start=clock();
  for (cii = querylist.begin(); cii != querylist.end(); ++cii)
    {
    double x = cii->x;
    double y = cii->y;
    double lx = x - .001;
    double ux = x + .001;
    double ly = y - .001;
    double uy = y + .001;
    point p1(lx,ly), p2(ux,uy);
    box query_box(p1, p2);

    vector<point> queryresult;
    rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));

    std::cout << "The intersection has " << (queryresult.size()) << " elements:\n";
    }
  stop=clock();
  cout<<"Time for query "<<(stop-start)/(double) CLOCKS_PER_SEC<<endl;

  return 0;
}

1 个答案:

答案 0 :(得分:3)

您的样本在统计上不合理。

让我们使查询框大得多:

double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area
// and later
for (auto& p : querylist) {
    box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});

完全符合预期的点击次数:

<强> Live On Coliru

Time for building R tree 2182054657 nanoseconds
Time for query 52749607 nanoseconds
Average intersections: 10000.3
Standard deviation:    98.9173
Expected mean:         10000

代码

注释

  • 用Boost Random(rand()发布版)
  • 替换uniform_real<>
  • 将框固定为不与区域边界重叠(因此我们没有得到偏斜的结果)
  • 而不是每次都填充一个向量,只需测量查询结果的数量:

    auto n = std::distance(
            bgi::qbegin(rtree, bgi::intersects(query_box)),
            bgi::qend(rtree));
    
  • 而不是“传闻”报告一些“交叉点”计数,使用Boost Accumulator做一些正确的统计:

    std::cout << "Average intersections: " << ba::mean(stats) << "\n";
    std::cout << "Standard deviation:    " << sqrt(ba::variance(stats)) << "\n";
    
    std::cout << "Expected mean:         " << (N * (2*delta) * (2*delta)) << "\n";
    

完整列表

#include <iostream>
#include <vector>
#include <time.h>
#include <math.h>
#include <stdlib.h>
#include <iomanip>

#include <boost/geometry/geometry.hpp>
#include <boost/geometry/geometries/geometries.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <boost/random.hpp>
#include <boost/chrono.hpp>

#include <boost/accumulators/accumulators.hpp>
#include <boost/accumulators/statistics.hpp>

namespace bg  = boost::geometry;
namespace bgi = boost::geometry::index;

double const delta = .05; // so boxes are .1x.1, i.e. 1/100th of total area

typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::box<point> box;

struct particle {
    double x;
    double y;
};

namespace ba  = boost::accumulators;
namespace bat = ba::tag;
using hrc = boost::chrono::high_resolution_clock;

int main() {
    boost::mt19937 prng(std::random_device{}());
    size_t N = 1000000ul;

    std::vector<particle> myvec;
    myvec.reserve(N);

    {
        boost::uniform_real<double> dist(0,1);

        // Set vector values
        for (size_t i = 0; i<N; ++i)
            myvec.push_back({dist(prng), dist(prng)});
    }

    // Build R-tree
    auto start = hrc::now();

    bgi::rtree<point, bgi::quadratic<16> > rtree;
    for (auto& p : myvec)
        rtree.insert(point {p.x, p.y});

    auto stop = hrc::now();
    std::cout << "Time for building R tree " << (stop - start) << std::endl;

    // Build Query List
    std::vector<particle> querylist;
    querylist.reserve(100);

    {
        boost::uniform_real<double> dist(0+delta,1-delta);

        for (size_t i = 0; i<100; ++i)
            querylist.push_back({dist(prng), dist(prng)});
    }

    ba::accumulator_set<double, ba::stats<bat::mean, bat::variance>> stats;

    // Query R-tree
    start = hrc::now();
    for (auto& p : querylist) {
        box query_box(point {p.x - delta, p.y - delta}, point {p.x + delta, p.y + delta});

#if 0
        std::vector<point> queryresult;
        rtree.query(bgi::intersects(query_box), std::back_inserter(queryresult));

        //std::cout << "The intersection has " << queryresult.size() << " elements:\n";
        stats(queryresult.size());
#else
        auto n = std::distance(
                bgi::qbegin(rtree, bgi::intersects(query_box)),
                bgi::qend(rtree));
        stats(n);
#endif
    }
    stop = hrc::now();
    std::cout << "Time for query " << (stop - start) << "\n";
    std::cout << "Average intersections: " << ba::mean(stats) << "\n";
    std::cout << "Standard deviation:    " << sqrt(ba::variance(stats)) << "\n";

    std::cout << "Expected mean:         " << (N * (2*delta) * (2*delta)) << "\n";
}

缩放

请注意结果比例:

double const delta = .005; // so boxes are .01x.01, i.e. 1/10000th of total area

Time for building R tree 2101529662 nanoseconds
Time for query 2982070 nanoseconds
Average intersections: 99.69
Standard deviation:    8.96069
Expected mean:         100

甚至

double const delta = .00005; // so boxes are .0001x.0001, i.e. 1/100000000th of total area

Time for building R tree 726361394 nanoseconds
Time for query 216690 nanoseconds
Average intersections: 0.02
Standard deviation:    0.14
Expected mean:         0.01