插入节点时neo4j性能问题

时间:2012-07-05 15:18:09

标签: neo4j

插入2000个节点消耗10000毫秒。这是我正在尝试的代码:

package org.demo.neo4j;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;

public class Main {

    private GraphDatabaseService neo = Noe4jUtils.getInstance();
    private static ExecutorService pool = Executors.newFixedThreadPool(4);
    private static Semaphore semaphore = new Semaphore(4);

    public static void main(String[] args) {
        Main main = new Main();
        main.insert();
    }

    private void insert() {
        for (int i = 0; i < 1000; i++) {
            try {
                semaphore.acquire();
                String refName = "REF-" + i;
                pool.execute(new InsertTask(refName, 100000));
            } catch (InterruptedException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    private void insertRef(String refName, int num) {
        Node rootNode = neo.getReferenceNode();
        RelationshipType rt = DynamicRelationshipType.withName(refName);
        Relationship r = rootNode.getSingleRelationship(rt, Direction.OUTGOING);
        Node refNode;
        if (null == r) {
            refNode = createRef(rootNode, rt);
        } else {
            refNode = r.getOtherNode(rootNode);
        }
        int size = 2000;
        int cnt = num / size;
        if ((num % size) != 0) {
            cnt = cnt + 1;
        }
        int index = 0;
        for (int i = 0; i < cnt; i++) {
            long l1 = System.currentTimeMillis();
            Transaction tx = neo.beginTx();
            try {
                int tmpNum = 0;
                for (int j = 0; j < size; j++) {
                    index++;
                    if (index > num) {
                        break;
                    }
                    tmpNum++;
                    createNode(refNode);
                }
                tx.success();
                System.out.println("insert " + tmpNum + " node.");
            } finally {
                tx.finish();
                l1 = System.currentTimeMillis() - l1;
                System.out.println("consume " + l1 + " ms.");
            }
        }
    }

    private Node createRef(Node node, RelationshipType rt) {
        Transaction tx = neo.beginTx();
        try {
            Node tmpNode = node.getGraphDatabase().createNode();
            node.createRelationshipTo(tmpNode, rt);
            tx.success();
            return tmpNode;
        } finally {
            tx.finish();
        }
    }

    private Node createNode(Node node) {
        RelationshipType rt = DynamicRelationshipType.withName("LINK");
        Node tmpNode = node.getGraphDatabase().createNode();
        node.createRelationshipTo(tmpNode, rt);
        for (int i = 0; i < 100; i++) {
            tmpNode.setProperty("key" + i, i);
        }
        return node;
    }

    class InsertTask implements Runnable {

        public InsertTask(String refName, int num) {
            this.refName = refName;
            this.num = num;
        }
        private String refName;
        private int num;

        @Override
        public void run() {
            try {
                insertRef(refName, num);
            } finally {
                semaphore.release();
            }
        }
    } }

2 个答案:

答案 0 :(得分:2)

创建这些小型交易的目的是什么? neo4j中的每个事务都会强制逻辑日志到磁盘,因此基本上所有时间都花在等待磁盘刷新上。因此,多线程并没有那么多帮助,相反它可能会更慢。尝试在每个事务中将许多操作组合在一起,并且有一个线程。

顺便问一下你的用例是什么?

答案 1 :(得分:-1)

你是在Linux下的ext4文件系统上运行吗?

如果您希望交易一点交易安全性以获得10-15倍更好的写入性能,请尝试使用barrier = 0选项,如下所述:http://structr.org/blog/neo4j-performance-on-ext4