浏览群集时OrientDB会变慢

时间:2014-08-24 10:16:49

标签: java orientdb

我想要实现的是在句子中保存单词对,如果单词已经存在,我试图保存一个单词列表。 为了保存配对,因为我的数据集文件非常大,可能有数百万,我选择了orientdb。我不知道我是否正确接近它,但orientdb非常慢。经过8个小时的运行,它只成了12000个句子。 据我所知,主要的减速是在浏览集群。 附上我的代码,如果蚂蚁可以对我的方法提出任何指示,请。

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        Main m = new Main();
        m.openDatabase();
        m.readFile("train_v2.txt");
        m.closeDatabase();
    }
}

class Main {
    ODatabaseDocumentTx db;
    Map<String, Object> index;
    List<Object> list = null;
    String pairing[];
    ODocument doc;

    Main() {

    }

    public void closeDatabase() {
        if (!db.isClosed()) {
            db.close();
        }
    }

    void openDatabase() {
        db = new ODatabaseDocumentTx("local:/databases/model").open("admin",
                "admin");
        doc = new ODocument("final");
    }

    public void readFile(String filename) {
        InputStream ins = null; // raw byte-stream
        Reader r = null; // cooked reader
        int i = 1;
        BufferedReader br = null; // buffered for readLine()
        try {
            String s;
            ins = new FileInputStream(filename);
            r = new InputStreamReader(ins, "UTF-8"); // leave charset out
                                                        // for
                                                        // default
            br = new BufferedReader(r);
            while ((s = br.readLine()) != null) {
                System.out.println("" + i);
                createTermPair(s.replaceAll("[^\\w ]", "").trim());
                i++;
            }
        } catch (Exception e) {
            System.err.println(e.getMessage()); // handle exception
        } finally {
            closeDatabase();
            if (br != null) {
                try {
                    br.close();
                } catch (Throwable t) { /* ensure close happens */
                }
            }
            if (r != null) {
                try {
                    r.close();
                } catch (Throwable t) { /* ensure close happens */
                }
            }
            if (ins != null) {
                try {
                    ins.close();
                } catch (Throwable t) { /* ensure close happens */
                }
            }
        }
    }

    private void createTermPair(String phrase) {
        phrase = phrase + " .";
        String[] word = phrase.split(" ");

        for (int i = 0; i < word.length - 1; i++) {
            if (!word[i].trim().equalsIgnoreCase("")
                    && !word[i + 1].trim().equalsIgnoreCase("")) {
                String wordFirst = word[i].toLowerCase().trim();
                String wordSecond = word[i + 1].toLowerCase().trim();
                String pair = wordFirst + " " + wordSecond;
                checkForPairAndWrite(pair);
            }
        }
    }

    private void checkForPairAndWrite(String pair) {
        try {
            pairing = pair.trim().split(" ");

            if (!pairing[1].equalsIgnoreCase(" ")) {
                index = new HashMap<String, Object>();

                for (ODocument docr : db.browseCluster("final")) {
                    list = docr.field(pairing[0]);
                }
                if (list == null) {
                    list = new ArrayList<>();
                }
                list.add("" + pairing[1]);

                if (list.size() >= 1)
                    index.put(pairing[0], list);
                doc.fields(index);
                doc.save();
            }// for (int i = 0; i < list.size(); i++) {
                // System.out.println("" + list.get(i));
                // }
        } catch (Exception e) {
        }
        return;

    }
}

0 个答案:

没有答案