我想使用Java代码中来自Apache Jena的库将tsv文件转换为rdf文件。我找到了一个将csv转换为rdf的示例,但是对我没有太大帮助。链接为:http://www.essi.upc.edu/dtim/blog/post/enter-the-world-of-semantics-using-jena-to-convert-your-data-to-rdf
能给我个主意吗?非常感谢!我应该如何更改给定的代码?
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Scanner;
import org.apache.commons.io.FileUtils;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.propertytable.graph.GraphCSV;
import org.apache.jena.propertytable.lang.CSV2RDF;
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.util.FileManager;
import org.apache.jena.vocabulary.RDF;
public static void convertCSVToRDF (String file,
String inputFilename, String outputFilename,String outputType) {
//Just a few lines below to convert the data from CSV to an RDF graph,
see how easy?!
CSV2RDF.init();//Initialise the CSV conversion engine in Jena
GraphCSV newGraph = new GraphCSV(inputFilename);
Model model = ModelFactory.createModelForGraph(newGraph);
//Manually insert class triples for each instance in the CSV file
String sparqlQueryString = "select distinct ?s where {?s ?p ?o}";
Query query = QueryFactory.create(sparqlQueryString);
QueryExecution qexec = QueryExecutionFactory.create(sparqlQueryString, model);
ResultSet s = qexec.execSelect();
Model m2 = ModelFactory.createDefaultModel();
while(s.hasNext()) {
QuerySolution so = s.nextSolution();
Triple t = new Triple(so.getResource("s").asNode(),RDF.type.asNode(),
NodeFactory.createBlankNode(file));
Statement stmt = ResourceFactory.createStatement(so.getResource("s"), RDF.type,
ResourceFactory.createResource(file));
m2.add(stmt);
}
Model m3 = ModelFactory.createUnion(model, m2); //create a new RDF graph which "unions"
//the old graph with the new graph containing
//the new rows
//Now serialize the RDF graph to an output file using the outputType input variable
you specify. It should be “N-Triple” in our case.
try {
FileWriter out = new FileWriter(outputFilename);
m3.write(out,outputType);
} catch (Exception e) {
System.out.println("Error in the file output process!");
e.printStackTrace();
}
//Delete specific triples of a specific predicate called ¨row¨
File output = new File(outputFilename);
File tempFile = new File("C:/Users/user1/SampleFile/temp.nt");
BufferedReader reader = null;
BufferedWriter writer = null;
try {
reader = new BufferedReader(new FileReader(output));
writer = new BufferedWriter(new FileWriter(tempFile));
String currentLine;
//Delete triples from the old file by skipping it while reading the input N-Triple
file from the last step, otherwise write the triple to a new temp file!
while ((currentLine = reader.readLine()) != null) {
if (currentLine.contains("http://w3c/future-csv-vocab/row")) {
continue;
} else {
writer.write(currentLine);
writer.newLine();
}
}
writer.close();
reader.close();
PrintWriter printer = new PrintWriter(output);
printer.print("");
printer.close();
//copy content from temp file to final output file, overwriting it.
FileUtils.copyFile(tempFile, output);
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
答案 0 :(得分:0)
在耶拿(Jena)中,没有将TSV转换为RDF的内置方法,因为TSV(就像CSV)不是RDF的格式, 而是以表格形式表示的任何数据。 也许由于这个原因,已经从Jena中删除了CSV支持(jena-csv的最新版本是3.9.0)。
但是TSV- t ab s 单独的 v alues-是非常简单的格式。 将TSV数据转换为RDF是一个非常简单的任务(大约10分钟的编码)。 您可以使用TSV做任何您想做的事,例如,您可以将所有TSV添加为巨大的文字。
但是以下方法展示了一种合理的方法,其中每一行都是具有数据属性声明的OWL个人。
String tsv = "Sepal length\tSepal width\tPetal length\tPetal width\tSpecies\n" +
"5.1\t3.5\t1.4\t0.2\tI. setosa\n" +
"4.9\t3.0\t1.4\t0.2\tI. setosa";
Charset ch = StandardCharsets.UTF_8;
String separator = "\t";
String ns = "http://ex#";
UnaryOperator<String> nameToURI = s -> ns + s.toLowerCase().replace(" ", "_");
Model m = ModelFactory.createDefaultModel()
.setNsPrefixes(PrefixMapping.Standard)
.setNsPrefix("ex", ns);
Resource clazz = m.createResource(ns + "MyClass", OWL.Class);
try (InputStream is = new ByteArrayInputStream(tsv.getBytes(ch));
Reader r = new InputStreamReader(is, ch);
BufferedReader br = new BufferedReader(r)) {
String first = br.lines().findFirst().orElseThrow(IllegalArgumentException::new);
List<Property> props = Arrays.stream(first.split(separator))
.map(s -> m.createResource(nameToURI.apply(s), OWL.DatatypeProperty)
.addProperty(RDFS.label, s).as(Property.class))
.collect(Collectors.toList());
br.lines().forEach(line -> {
String[] data = line.split(separator);
if (data.length != props.size()) throw new IllegalArgumentException();
Resource individual = m.createResource(clazz);
for (int i = 0; i < data.length; i++) {
individual.addProperty(props.get(i), data[i]);
}
});
}
m.write(System.out, "ttl");
输出:
@prefix ex: <http://ex#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
ex:MyClass a owl:Class .
ex:sepal_width a owl:DatatypeProperty ;
rdfs:label "Sepal width" .
ex:species a owl:DatatypeProperty ;
rdfs:label "Species" .
ex:sepal_length a owl:DatatypeProperty ;
rdfs:label "Sepal length" .
ex:petal_length a owl:DatatypeProperty ;
rdfs:label "Petal length" .
ex:petal_width a owl:DatatypeProperty ;
rdfs:label "Petal width" .
[ a ex:MyClass ;
ex:petal_length "1.4" ;
ex:petal_width "0.2" ;
ex:sepal_length "5.1" ;
ex:sepal_width "3.5" ;
ex:species "I. setosa"
] .
[ a ex:MyClass ;
ex:petal_length "1.4" ;
ex:petal_width "0.2" ;
ex:sepal_length "4.9" ;
ex:sepal_width "3.0" ;
ex:species "I. setosa"
] .