我收集了1455413个文件(sentenceDB2.locations) 我需要处理每个文档并为不同的数据库和集合创建新文档(locationFeaturesAnalysisDB.features2)。
出于某种原因,该过程在761447文档之后停止,当我试图跳过这些文档时,它告诉我该文档已经处理(新文档已经在数据库中)。我试图使用我所知道的关于查询或光标超时的所有内容
我做错了什么?我的代码:
public static MongoClient mongo = new MongoClient(Consts.serverAddress, new MongoClientOptions.Builder().maxWaitTime(Integer.MAX_VALUE).socketKeepAlive(true).build());
public static DBCollection sentencesCollection = mongo.getDB("sentencesDB2").getCollection("sentences");
public static DBCollection nounsCollection = mongo.getDB("sentencesDB2").getCollection("nouns");
public static DBCollection locationsCollection = mongo.getDB("sentencesDB2").getCollection("locations");
public static DBCollection dependenciesCollection = mongo.getDB("sentencesDB2").getCollection("dependencies");
public static DBCollection featuresCollection = mongo.getDB("locationFeaturesAnalysisDB").getCollection("features2");
public static void main (String [] args)
{
long featuresSize = featuresCollection.count();
DBCursor locations = locationsCollection.find().addOption(Bytes.QUERYOPTION_NOTIMEOUT);
System.out.println(locations.size());
for(long i = 0; locations.hasNext(); i ++)
{
DBObject location = locations.next();
double distanceFromHotel = (Double) location.get("distanceFromHotel");
Object nounId = location.get("nounId");
System.out.println("treating noun :" + nounId);
DBObject noun = nounsCollection.find(new BasicDBObject("_id", nounId)).next();
Object sentenceId = noun.get("sentenceID");
DBObject sentence = sentencesCollection.find(new BasicDBObject("_id", sentenceId)).next();
DBCursor features = featuresCollection.find(new BasicDBObject("nounId", nounId)).addOption(Bytes.QUERYOPTION_NOTIMEOUT);
if(!features.hasNext())
{
int nounStartIndex = (Integer) noun.get("startIndex");
int nounEndIndex = (Integer) noun.get("endIndex");
BasicDBObject analysis = analyzeInstance(noun, sentence)
.append("nearby", (distanceFromHotel <= 2))
.append("nounId", nounId)
.append("sentenceId", sentenceId)
.append("locationId", location.get("_id"))
.append("sentence", sentence.get("sentence"))
.append("nounString", noun.get("nounString"))
.append("nounStartIndex", nounStartIndex)
.append("nounEndIndex", nounEndIndex)
.append("distanceFromHotel", distanceFromHotel);
featuresCollection.insert(analysis);
}
else
{
System.out.println("is in db");
}
features.close();
}
locations.close();
由于