我有以下集合geo
,其中包含以下索引:
> db.geo.getIndexes();
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "geospatial.geo"
},
{
"v" : 1,
"key" : {
"checkin_id" : -1
},
"name" : "checkin_id_-1",
"ns" : "geospatial.geo"
},
{
"v" : 1,
"key" : {
"loc" : "2dsphere",
"created_at" : -1,
"user_id" : 1
},
"name" : "loc_2dsphere_created_at_-1_user_id_1",
"ns" : "geospatial.geo",
"2dsphereIndexVersion" : 2
},
{
"v" : 1,
"key" : {
"created_at" : -1
},
"name" : "created_at_-1",
"ns" : "geospatial.geo",
"expireAfterSeconds" : 31557600
},
{
"v" : 1,
"key" : {
"loc" : "2dsphere",
"created_at" : 1
},
"name" : "loc_2dsphere_created_at_1",
"ns" : "geospatial.geo",
"background" : true,
"2dsphereIndexVersion" : 2
}
]
但运行此查询:
db.geo.find(
{
loc:
{ $near :
{
$geometry: { type: "Point", coordinates: [ -73.9667, 40.78 ] },
$minDistance: 1000,
$maxDistance: 5000
}
},
created_at: { $gte : new ISODate("2017-06-23T00:00:00Z") }
}
)
OR
db.geo.distinct(
"user_id",
{
loc:
{ $near :
{
$geometry: { type: "Point", coordinates: [ -73.9667, 40.78 ] },
$maxDistance: 16093.4
}
},
created_at: { $gte : new ISODate("2017-06-23T00:00:00Z") }
}
).length
它只是超时,而且从不处理。有什么想法我在这里缺少一个索引?这个系列中有74m +的文件。
示例文档:
{
"_id" : ObjectId("59740de9e5bfa822388b4567"),
"checkin_id" : XXXXX,
"user_id" : XXXX,
"created_at" : ISODate("2017-07-23T02:44:36.000Z"),
"loc" : {
"type" : "Point",
"coordinates" : [
-88.2165,
42.3718
]
}
}
更新:
以下是executionStats
结果:
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 57740,
"nscannedObjects" : 77737,
"nscanned" : 200823,
"nscannedObjectsAllPlans" : 123823,
"nscannedAllPlans" : 300660,
"scanAndOrder" : false,
"indexOnly" : false,
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 1514,
"indexBounds" : {
},
"allPlans" : [
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 57740,
"nscannedObjects" : 77737,
"nscanned" : 200823,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
}
},
{
"cursor" : "S2NearCursor",
"isMultiKey" : false,
"n" : 101,
"nscannedObjects" : 46086,
"nscanned" : 99837,
"scanAndOrder" : false,
"indexOnly" : false,
"nChunkSkips" : 0,
"indexBounds" : {
}
}
],
"server" : "ut-geobrain-01:27017",
"filterSet" : false,
"stats" : {
"type" : "GEO_NEAR_2DSPHERE",
"works" : 258567,
"yields" : 0,
"unyields" : 0,
"invalidates" : 0,
"advanced" : 57740,
"needTime" : 0,
"needFetch" : 0,
"isEOF" : 1,
"children" : [ ]
}
}
答案 0 :(得分:0)
通过from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen("http://en.wikipedia.org/wiki/Kevin_Bacon")
bsObj = BeautifulSoup(html, "html.parser")
for links in bsObj.findAll("a"):
if 'href' in links.attrs:
print (links.attrs['href'])
maxTimeMS