Neo4j SPARQL查询停止工作

时间:2013-10-29 16:07:24

标签: java neo4j sparql n-triples

我写了两个应该将* .nt文件转换为neo4j数据库的java类。第一个使用SPARQL-Plugin的loadTriples()函数
https://github.com/neo4j-contrib/sparqlplugin/blob/master/src/test/java/org/neo4j/server/plugin/sparql/BerlinDatasetTest.java 这是第一类的源代码:

package src;

import com.tinkerpop.blueprints.TransactionalGraph;
import com.tinkerpop.blueprints.impls.neo4j.Neo4jGraph;
import com.tinkerpop.blueprints.oupls.sail.GraphSail;
import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph;
import com.tinkerpop.blueprints.util.wrappers.batch.VertexIDType;
import java.io.File;
import java.net.URI;
import java.net.URL;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.repository.sail.SailRepositoryConnection;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.helpers.BasicParserSettings;
import org.openrdf.sail.Sail;


public class QUADParser2 {

    private File f;
    private String DB_PATH = "db/graphdb_qp2";


    public QUADParser2(File input_file) {
        this.f = input_file;

    }


    public void parseFile() throws Exception {

        Neo4jGraph neo4jGraph = new Neo4jGraph(DB_PATH);
        int FST_INDEX = 1; // buffer size must be positive
        BatchGraph<TransactionalGraph> neo = new BatchGraph<TransactionalGraph>(neo4jGraph, VertexIDType.NUMBER, FST_INDEX);
        Sail sail = new GraphSail( neo4jGraph );
        sail.initialize();

        SailRepositoryConnection connection;
        try
        {
            connection = new SailRepository( sail ).getConnection();

            URI uri = f.toURI();
            URL url = uri.toURL();
            //URL url = getClass().getResource( f.getPath());
            System.out.println( "Loading " + url + ": " );

            connection.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
            connection.add(url, null, RDFFormat.NTRIPLES);
            connection.commit();
            connection.close();
        }
        catch ( Exception e1 )
        {
            e1.printStackTrace(System.out);
        }
        System.out.print( "Done." );
        sail.shutDown();
        neo.shutdown();

    }


}

第二类仅使用neo4j库将* .nt文件转换为neo4j数据库。这是源代码:

package src;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Iterator;
import org.neo4j.graphdb.DynamicRelationshipType;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.tooling.GlobalGraphOperations;


public class QUADParser41 {
    GraphDatabaseService graphDb;

    private File f;

    private boolean init = false;
    private String G_NAME = "N";//"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances";
    private String DB_PATH = "db/graphdb_qp41";


    public QUADParser41(String input_db_path, File input_file) {
        this.DB_PATH = input_db_path;
        this.f = input_file;
    }


    public String[] getTriple(String line) {
        //parse a triple
        String[] output;
        // remove _._ 
        line = line.substring(0, line.length() - 1);
        // remove _<_
        line = line.replace("<", "");
        // remove _"_
        line = line.replace("\"", "");

        // use _>_ to split the String into an array
        output = line.split("> ");

        output[2] = output[2].trim();

        return output;
    }




    public void parseFile() {
        this.init = false;

        graphDb = new GraphDatabaseFactory().newEmbeddedDatabase(this.DB_PATH);

        ArrayList<String[]> triples = new ArrayList<String[]>();

        try {
            BufferedReader bfr = new BufferedReader(new FileReader(f));

            // Create nodes
            String current = "";
            while ((current = bfr.readLine()) != null) {
                String[] triple = getTriple(current);

                // collect triples
                triples.add(triple); 

                // group triples to avoid memory issues
                if (triples.size() > 10000) {
                    System.out.println("adding up to 10k nodes");
                    addNodes(triples);
                    triples.clear();
                } 
            }
            // ad remaining triples
            addNodes(triples);
            triples.clear();

            // remove custom_key property from all nodes
            // remove node with id 0
            cleanNodes();

            this.graphDb.shutdown();

        } catch (Exception e) {
            e.printStackTrace(System.out);
        }

    }



    /**
     * Removes custom_key property from all nodes 
     * and removes the node with 0
     */
    public void cleanNodes() {
        int c = 0;

        System.out.println("cleaning nodes");
        // remove custom_key property from nodes
        Transaction tx = graphDb.beginTx();
        Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator();
        try {
            while (nodes.hasNext()) {
                Node n = nodes.next();
                if (n.getId() == 0) n.delete(); 
                else 
                    if (n.hasProperty("custom_key")) {
                        // if property esists: remove it
                        if (n.removeProperty("custom_key") != null)
                            c++;
                    }
            }
            tx.success();
        } finally {
            tx.finish();
        }

        System.out.println("Cleaning is done, cleaned " + c + " nodes");

    }


    /**
     * Returns true if node with given custom_key exists in current graphDb
     * @param custom_key
     * @return Node, if exists. null otherwise
     */
    public Node findNode(String custom_key) {
        Iterator<Node> nodes = GlobalGraphOperations.at(graphDb).getAllNodes().iterator();
        while (nodes.hasNext()) {
            Node n = nodes.next();
            if (n.hasProperty("custom_key"))
                if (n.getProperty("custom_key").equals(custom_key))
                    return n;
        }
        return null;
    }



    public void addNodes(ArrayList<String[]> triples) {
        ArrayList<Relationship> MASTER_RELS = new ArrayList<Relationship>();

        Transaction transaction = graphDb.beginTx();

        try {
            // Create nodes
            // blueprints
            if (init == false) {
                Node bp_meta = graphDb.createNode();
                bp_meta.setProperty("value", "urn:com.tinkerpop.blueprints.pgm.oupls.sail:namespaces");
                init = true;
                System.out.println("* Added meta node");
            }
            // add actual content
            for (int i = 0; i < triples.size(); i++) {
                // subject
                // get a list of properties. each array 
                // contains a (key,value) pair of all properties 
                // which should be created for this node
                ArrayList<String[]> nprops = getPropertyList(triples.get(i)[0]);
                // Search node by cursom_key property
                Node s = findNode(nprops.get(nprops.size() - 1)[1]);
                // Create node if it doesnt exist yet
                if (s == null) {
                    s = graphDb.createNode();
                    for (int j = 0; j < nprops.size(); j++) {
                        s.setProperty(nprops.get(j)[0], nprops.get(j)[1]);
                    }
                }

                // object (second node)
                // Create property list
                // contains a (key,value) pair of all properties 
                // which should be created for this node
                nprops = getPropertyList(triples.get(i)[2]);
                Node o = findNode(nprops.get(nprops.size() - 1)[1]);
                if (o == null) {
                    o = graphDb.createNode();
                    for (int j = 0; j < nprops.size(); j++) 
                        o.setProperty(nprops.get(j)[0], nprops.get(j)[1]);
                }

                // predicate is the relationship name
                //create relationship object and add properties
                DynamicRelationshipType drt = DynamicRelationshipType.withName(triples.get(i)[1]);

                Relationship p = s.createRelationshipTo(o, drt);
                p.setProperty("cp", G_NAME + " U " + triples.get(i)[1]);
                p.setProperty("c", G_NAME);
                p.setProperty("p", "U " + triples.get(i)[1]);
                if (MASTER_RELS.indexOf(p) >= 0) System.out.println("double relationship!");
                else MASTER_RELS.add(p);

            }
            // end transaction
            transaction.success();
        } finally {
            transaction.finish();
            System.out.println("done with adding nodes");
            System.out.println("processing next 10k nodes");
        }
    }



    /**
     * Create property list for given triple element
     * @param entity An element of a triple
     * @return List of (key,value) pairs. Those are 
     * the properties which should be created for this node
     */
    public ArrayList<String[]> getPropertyList(String entity) {
        ArrayList<String[]> plist = new ArrayList<String[]>();

        String[] prop = new String[2];
        if (entity.contains("http://")) {
            if (entity.contains("^^")) {
                //literal type
                prop[0] = "value";
                prop[1] = entity.split("\\^\\^")[0]; // x^^
                plist.add(prop);
                prop = new String[2];
                prop[0] = "type";
                prop[1] = entity.split("\\^\\^")[1]; // ^^y
                plist.add(prop);
                prop = new String[2];
                prop[0] = "kind";
                prop[1] = "literal";
                plist.add(prop);
                prop = new String[2];
                prop[0] = "custom_key";
                prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1];
                plist.add(prop);
            } else {
                //uri
                prop[0] = "value";
                prop[1] = entity;
                plist.add(prop);
                prop = new String[2];
                prop[0] = "kind";
                prop[1] = "uri";
                plist.add(prop);
                prop = new String[2];
                prop[0] = "custom_key";
                prop[1] = plist.get(0)[1] + plist.get(1)[1];
                plist.add(prop);
            }
        } else if (entity.contains("@")) {
            // +lang
            prop[0] = "value";
            prop[1] = entity.split("@")[0]; // x@
            plist.add(prop);
            prop = new String[2];
            prop[0] = "lang";
            prop[1] = entity.split("@")[1]; // @y
            plist.add(prop);
            prop = new String[2];
            prop[0] = "kind";
            prop[1] = "literal";
            plist.add(prop);
            prop = new String[2];
            prop[0] = "custom_key";
            prop[1] = plist.get(0)[1] + plist.get(1)[1] + plist.get(2)[1];
            plist.add(prop);
        } else {
            // simple literal like "xyz"
            prop[0] = "value";
            prop[1] = entity;
            plist.add(prop);
            prop = new String[2];
            prop[0] = "kind";
            prop[1] = "literal";
            plist.add(prop);
            prop = new String[2];
            prop[0] = "custom_key";
            prop[1] = plist.get(0)[1] + plist.get(1)[1];
            plist.add(prop);
        }
        return plist;
    }


}

第二个类应该创建与数据库完全相同的数据库 头等舱。我写了一个比较所有节点的测试类 性能;这个类告诉我两个数据库没有任何区别。 这是(相当小)* .nt测试数据(我称之为q6_test.nt)

<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/2000/01/rdf-schema#label> "Car" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/2000/01/rdf-schema#label> "Orange" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/2000/01/rdf-schema#label> "Cherry" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product3> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/2000/01/rdf-schema#label> "Cookie" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product4> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/2000/01/rdf-schema#label> "Bike" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product5> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/2000/01/rdf-schema#label> "Pen" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product6> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/2000/01/rdf-schema#label> "Paper" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product7> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/2000/01/rdf-schema#label> "Book" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product8> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/2000/01/rdf-schema#label> "Shoe" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product9> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/2000/01/rdf-schema#label> "Shirt" .
<http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product10> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product> .

我正在尝试对数据集运行以下查询

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>  
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
PREFIX bsbm: <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/>
SELECT ?product ?label WHERE {
    ?product rdfs:label ?label 
    ?product rdf:type bsbm:Product 
    FILTER regex(?label, "r")}

我的问题:如果我在第一个类创建的数据库上运行此查询,此查询会获得很多结果,但是如果我尝试在创建的数据库上运行此查询在第二节课中,我只得到第一次观看时间的结果(特别是如果我在每次跑步之间等待一两分钟)。此外,查询始终适用于数据库文件夹 如果我在查询中切换以下行,则由第二个类创建:

?product rdfs:label ?label 
?product rdf:type bsbm:Product


(但如果可能,我想在不触及查询的情况下解决此问题) 这是我如何测试我的数据集上的查询:
1)运行Java类
2)删除neo4j / data / graph.db文件夹的所有内容
3)从neo4j / data /文件夹中删除密钥库和rrd文件 4)运行Neo4j(等待它运行)
5)停止Neo4j
6)删除neo4j / data / graph.db文件夹的所有内容
7)复制由我的java-class创建的数据库文件夹的所有内容 进入neo4j / data / graph.db文件夹
8)启动Neo4j
9)运行查询
(我可能不必做所有这些步骤,但我想要更加确定 在新的数据库上工作。
我的系统:
Neo4j版本:community-1.9.4(Windows,从zip存档安装)
附加:我已经更新了gremlin和SPARQL插件的蓝图库 我能找到的最新版本(版本2.5.0)
操作系统:Windows 7(Service Pack 1)
Java:JDK 1.7

1 个答案:

答案 0 :(得分:0)

使用最新版本的Neo4j,Blueprints和Sesame解决了这个问题。

Neo4j 2.0.1
http://www.neo4j.org/

蓝图2.5.0 / 3.0.0
https://github.com/tinkerpop/blueprints
https://oss.sonatype.org/content/repositories/snapshots/com/tinkerpop/blueprints/

芝麻2.7
http://www.openrdf.org/