尝试了解OrientDB ETL配置json文件。
假设CSV文件位于:
如何将顶点的类设置为“class”列的值?
我在OrientDB ETL文档中花了很多时间试图解决这个问题。我尝试了let
和block
以及code
组件的许多不同组合。我尝试过变量名称,例如className
和$className
以及${classname}
。
当前结果:
code
组件能够正确打印“className”的值,因此我知道它已正确设置。vertex
组件未正确引用变量,因此将每个顶点的类设置为null
。我在localhost上有一个名为“deleteme”的新创建的数据库(PLOCAL GRAPH
)。
我有一个顶点CSV文件(nodes.csv),如下所示:
id,name,class
1,Jack,Foo
2,Jill,Bar
3,Gephri,Baz
一个ETL配置文件(test.json),如下所示:
{
"config": {
"log": "DEBUG"
},
"source": {"file": {"path": "nodes.csv"}},
"extractor": {"csv": {}},
"transformers": [
{"block": {"let": {"name": "$className",
"value": "$input.class"}}},
{"code": {"language": "Javascript",
"code": "print(className + '\\n'); input;"}},
{"vertex": {"class": "$className"}}
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost:2424/deleteme",
"dbUser": "admin",
"dbPassword": "admin",
"dbType": "graph",
"tx": false,
"wal": false,
"batchCommit": 1000,
"classes": [
{"name": "Foo", "extends": "V"},
{"name": "Bar", "extends": "V"},
{"name": "Baz", "extends": "V"}
]
}
}
}
当我运行ETL作业时,我的输出看起来像这样:
aj@host:~/bin/orientdb-community-2.1.13/bin$ ./oetl.sh test.json
OrientDB etl v.2.1.13 (build 2.1.x@r9bc1a54a4a62c4de555fc5360357f446f8d2bc84; 2016-03-14 17:00:05+0000) www.orientdb.com
BEGIN ETL PROCESSOR
[file] INFO Reading from file nodes.csv with encoding UTF-8
[orientdb] DEBUG - OrientDBLoader: created vertex class 'Foo' extends 'V'
[orientdb] DEBUG orientdb: found 0 vertices in class 'null'
+ extracted 0 rows (0 rows/sec) - 0 rows -> loaded 0 vertices (0 vertices/sec) Total time: 1001ms [0 warnings, 0 errors]
[orientdb] DEBUG - OrientDBLoader: created vertex class 'Bar' extends 'V'
[orientdb] DEBUG orientdb: found 0 vertices in class 'null'
[orientdb] DEBUG - OrientDBLoader: created vertex class 'Baz' extends 'V'
[orientdb] DEBUG orientdb: found 0 vertices in class 'null'
[csv] DEBUG document={id:1,class:Foo,name:Jack}
[1:block] DEBUG Transformer input: {id:1,class:Foo,name:Jack}
[1:block] DEBUG Transformer output: {id:1,class:Foo,name:Jack}
[1:code] DEBUG Transformer input: {id:1,class:Foo,name:Jack}
Foo
[1:code] DEBUG executed code=OCommandExecutorScript [text=print(className); input;], result={id:1,class:Foo,name:Jack}
[1:code] DEBUG Transformer output: {id:1,class:Foo,name:Jack}
[1:vertex] DEBUG Transformer input: {id:1,class:Foo,name:Jack}
[1:vertex] DEBUG Transformer output: v(null)[#3:0]
[csv] DEBUG document={id:2,class:Bar,name:Jill}
[2:block] DEBUG Transformer input: {id:2,class:Bar,name:Jill}
[2:block] DEBUG Transformer output: {id:2,class:Bar,name:Jill}
[2:code] DEBUG Transformer input: {id:2,class:Bar,name:Jill}
Bar
[2:code] DEBUG executed code=OCommandExecutorScript [text=print(className); input;], result={id:2,class:Bar,name:Jill}
[2:code] DEBUG Transformer output: {id:2,class:Bar,name:Jill}
[2:vertex] DEBUG Transformer input: {id:2,class:Bar,name:Jill}
[2:vertex] DEBUG Transformer output: v(null)[#3:1]
[csv] DEBUG document={id:3,class:Baz,name:Gephri}
[3:block] DEBUG Transformer input: {id:3,class:Baz,name:Gephri}
[3:block] DEBUG Transformer output: {id:3,class:Baz,name:Gephri}
[3:code] DEBUG Transformer input: {id:3,class:Baz,name:Gephri}
Baz
[3:code] DEBUG executed code=OCommandExecutorScript [text=print(className); input;], result={id:3,class:Baz,name:Gephri}
[3:code] DEBUG Transformer output: {id:3,class:Baz,name:Gephri}
[3:vertex] DEBUG Transformer input: {id:3,class:Baz,name:Gephri}
[3:vertex] DEBUG Transformer output: v(null)[#3:2]
END ETL PROCESSOR
+ extracted 3 rows (4 rows/sec) - 3 rows -> loaded 3 vertices (4 vertices/sec) Total time: 1684ms [0 warnings, 0 errors]
哦,DEBUG orientdb: found 0 vertices in class 'null'
是什么意思?
答案 0 :(得分:1)
试试这个。我也和它搏斗了一段时间,但下面的设置对我有用。
请注意,在 @class
变换器之前设置vertex
会使用正确的类初始化顶点。
"transformers": [
{"block": {"let": {"name": "$className",
"value": "$input.class"}}},
{"code": {"language": "Javascript",
"code": "print(className + '\\n'); input;"}},
{ "field": {
"fieldName": "@class",
"expression": "$className"
}
},
{"vertex": {}}
]
答案 1 :(得分:0)
要获得结果,您可以使用" ETL"将数据从csv导入名为" Generic"的CLASS。 通过JS函数" separateClass()&#34 ;,创建新的类,从属性' Class'从csv导入,并将Generic类中的顶点放到新类中。
档案json:
{
"source": { "file": {"path": "data.csv"}},
"extractor": { "row": {}},
"begin": [
{ "let": { "name": "$className", "value": "Generic"} }
],
"transformers": [
{"csv": {
"separator": ",",
"nullValue": "NULL",
"columnsOnFirstLine": true,
"columns": [
"id:Integer",
"name:String",
"class:String"
]
}
},
{"vertex": {"class": "$className", "skipDuplicates": true}}
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/test",
"dbType": "graph"
}
}
}
从etl导入数据后,在javascript中创建函数
var g = orient.getGraphNoTx();
var queryResult= g.command("sql", "SELECT FROM Generic");
//example filed vertex: ID, NAME, CLASS
if (!queryResult.length) {
print("Empty");
} else {
//for each value create or insert in class
for (var i = 0; i < queryResult.length; i++) {
var className = queryResult[i].getProperty("class").toString();
//chech is className is already created
var countClass = g.command("sql","select from V where @class = '"+className+"'");
if (!countClass.length) {
g.command("sql","CREATE CLASS "+className+" extends V");
g.command("sql"," CREATE PROPERTY "+className+".id INTEGER");
g.command("sql"," CREATE PROPERTY "+className+".name STRING");
g.commit();
}
var id = queryResult[i].getProperty("id").toString();
var name = queryResult[i].getProperty("name").toString();
g.command("sql","INSERT INTO "+className+ " (id, name) VALUES ("+id+",'"+name+"')");
g.commit();
}
//remove class generic
g.command("sql","truncate class Generic unsafe");
}
结果应该如图所示。