我正在尝试处理大量数据,但是只有150句话后我得到一个错误,即java堆空间内存不足我的问题是项目需要花费大量时间来编译,并且在我尝试的每个方法之后我必须等待大约2个小时才能看到结果 所以我想要一个确定的方法。 (我认为问题是我正在使用集合并在每个增量中并将其分配给旧的,因此垃圾收集器不会删除旧的,这是我的猜测,我不确定)
for (int i = 0; i < 150; i++) {
id = read.get(i).GetId();
if (!pathid.equalsIgnoreCase(id.split(":")[0])) {
writer.flush();
writer.close();
pathid=id.split(":")[0];
path = "alingments//" + pathid + ".txt";
file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
writer = new FileWriter(file);
}
String En = read.get(i).getEn();
EnSentence preTagged = new EnSentence(En);
/////// dealin with arabic sentence
String Ar = read.get(i).getAr();
ArSentence preMorh = new ArSentence(Ar);
//preProcessor
preProces.replaceNumSuff(preTagged);
preProces.replaceArabMonth(preMorh);
preProces.replaceAbbreviation(preTagged);
EnSentence TagSentence = tagger.TagSentence(preTagged);
// /english Ensentence ready to match
EnSentence en_with_Syn = wnet.Lemmatize(TagSentence, true, true);
ProbMatrix prob;
ArSentence sentence = morph.getSentence(preMorh);
prob = new ProbMatrix(id, en_with_Syn, sentence);
Matcher dictionary = new Matcher(prob);
dictionary.match(en_with_Syn, sentence);
dictionary.getProb().getAlingments(10);
dictionary.getProb().getNearestNiebourAlignment(false);
dictionary.getProb().getNearestNiebourAlignment(true);
String alingments = dictionary.getProb().displayAlignments();
writer.append(alingments);
}
writer.close();
}