我尝试将mongoDB用作搜索引擎,并意识到这是一场灾难。我尝试对5百万个地理位置文档进行简单查询。
db.runCommand( { dropDatabase: 1 } )
db.createCollection("places");
db.places.createIndex( { "locs.loc" : "2dsphere" } )
function randInt(n) { return parseInt(Math.random()*n); }
function randFloat(n) { return Math.random()*n; }
for(var j=0; j<10; j++) {
print("Building op "+j);
var bulkop=db.places.initializeOrderedBulkOp() ;
for (var i = 0; i < 1000000; ++i) {
bulkop.insert(
{
locs: [
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
},
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
}
]
}
)
};
print("Executing op "+j);
bulkop.execute();
}
然后我运行一个不匹配文档的查询:
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: { category: "xyz" }
}
)
它需要 4分钟才能返回!
"waitedMS" : NumberLong(0),
"results" : [ ],
"stats" : {
"nscanned" : 10018218,
"objectsLoaded" : 15000000,
"maxDistance" : 0,
"time" : 219873
},
"ok" : 1
对于例如sphinx索引(例如,实际上根本没有使用索引进行此类查询,它只是滚动已经在内存中的所有reccord来过滤它们)的相同查询,返回 200 ms
我做错了什么?它们在计算机上有32GB的可用内存,所有数据仅使用150 MB。他们以任何方式加快mongoDB的速度吗?或者最终我们不能将mongoDB用作搜索引擎?
答案 0 :(得分:3)
MongoDB 3.4rc,包含2mln记录
我认为您的代码存在的问题与“查询”问题有关。 param,因为你在没有索引的集合上进行另一个查询。
更新(包含结果/统计信息):
db.runCommand( { dropDatabase: 1 } )
db.createCollection("places");
db.places.createIndex( { "locs.loc.coordinates" : "2dsphere" } )
function randInt(n) { return parseInt(Math.random()*n); }
function randFloat(n) { return Math.random()*n; }
for(var j=0; j<10; j++) {
print("Building op "+j);
var bulkop=db.places.initializeOrderedBulkOp() ;
for (var i = 0; i < 1000000; ++i) {
bulkop.insert(
{
locs: [
{
loc : {
type: "Point",
coordinates: [ randFloat(180), randFloat(90) ]
}
},
{
loc : {
coordinates: [ randFloat(180), randFloat(90) ]
}
}
]
}
)
};
print("Executing op "+j);
bulkop.execute();
}
这是查询:
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true
}
)
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: { category: "private" }
}
)
创建&#34;类别&#34;指数: {locs.loc.coordinates:&#34; 2dsphere&#34;,category:1}
<强>更新强> 通过添加&#34; maxDistance&#34;你可以执行 396ms vs 6863ms
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: {category: "private"},
maxDistance: 1000000
}
)
maxDistance:1000000
"stats" : {
"nscanned" : NumberInt(107820),
"objectsLoaded" : NumberInt(1),
"avgDistance" : 938598.1782650856,
"maxDistance" : 938598.1782650856,
"time" : NumberInt(396)
}
没有&#34; maxDistance&#34;:
db.runCommand(
{
geoNear: "places",
near: { type: "Point", coordinates: [ 73.9667, 40.78 ] },
spherical: true,
query: {category: "private"}
}
)
"stats" : {
"nscanned" : NumberInt(2023916),
"objectsLoaded" : NumberInt(6),
"avgDistance" : 3013587.205365039,
"maxDistance" : 4263919.742779636,
"time" : NumberInt(6863)
}
来源: https://www.mongodb.com/blog/post/geospatial-performance-improvements-in-mongodb-3-2
您的查询更多的是使用&#34;坐标数组&#34;我认为没用,因为一个对象(通常)有1个地理定位点。
另一种优化方法是制作&#34; geoWithin &#34;因为没有按&#34;距离排序&#34; (也许你想按&#34;大多数被投票的餐厅排序&#34;)。取决于场景。