将巨大的csv文件导入neo4j

时间:2015-06-23 07:49:44

标签: csv neo4j cypher bulk

我知道Import tool,但在我的情况下,我必须阅读一行并将其分解为节点和关系。使用带有索引的定期提交的load csv查询,并导入2Million行,花费超过12小时。有没有办法让我使用上面提到的工具而不必将csv预处理成节点和关系?

以下是我使用的示例查询

CREATE INDEX ON :Patient(mrno);
CREATE INDEX ON :Location(city);
CREATE INDEX ON :Department(id);

USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
WITH line,
(CASE  WHEN line.MRNo='' OR line.MRNo='null'  THEN "BLEH" ELSE line.MRNo END, "NA") AS mrn,
(CASE  WHEN line.ID_Admit='' OR line.ID_Admit='NULL'  THEN -1 ELSE line.ID_Admit END,0) AS ID_Admit,
(CASE  WHEN line.DeptCode_Admit='' OR line.DeptCode_Admit='NULL'  THEN -1 ELSE line.DeptCode_Admit END,0) AS DeptCode_Admit,
(CASE  WHEN line.City='' OR line.City='NULL'  THEN "BLEH" ELSE line.City END,"NA") AS city

MERGE (p:Person { mrn: mrn}) ON MATCH SET p.DOB=line.DateOfBirth,p.gender=line.GenderDescription,p.prefix=line.PrefixDescription ON CREATE SET p.DOB=line.DateOfBirth,p.gender=line.GenderDescription,p.prefix=line.PrefixDescription
CREATE (a:Admission{HospitalName:line.Hospital,id:toInt(ID_Admit),unitId:line.UnitID_Admit,IPDNo:line.IPDNO,DateOfAdmission:line.Date_Admit})
MERGE(d:Department{id:toInt(DeptCode_Admit)}) ON MATCH SET d.name=line.DeptName_Admit
MERGE(l:Location{city:city}) ON MATCH SET l.country=line.Country,l.state=line.State


merge  p-[:Admitted]->a 
MERGE a-[:Located]->l

1 个答案:

答案 0 :(得分:3)

执行多次运行(您甚至可以并行执行多个浏览器或neo4j-shell会话)应该非常简单。

  1. 删除ON MATCH SET
  2. 你拼错了 o
  3. 您缺少:Person(mrno), :Admission(id)
  4. 的索引
  5. 您的个案陈述已关闭
  6. 您在匹配集时使用ON CREATE SET
  7. 您可以通过在WITH上仅使用要导入的字段运行不同内容来进一步优化导入,请参阅部门
  8. 这是您的固定/完整/多次运行导入脚本:

    CREATE INDEX ON :Patient(mrno);
    
    
    CREATE INDEX ON :Location(city);
    CREATE INDEX ON :Department(id);
    
    // additional indexes / constraints
    
    CREATE INDEX ON :Person(mrno);
    
    CREATE CONSTRAINT ON (a:Admission) assert a.id is unique;
    
    USING PERIODIC COMMIT 100000
    explain
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH line,
    CASE  WHEN line.MRNo='' OR line.MRNo='null'  THEN "NA" ELSE line.MRNo END AS mrno
    
    MERGE (p:Person { mrno: mrno}) 
      ON CREATE SET p.DOB=line.DateOfBirth,p.gender=line.GenderDescription,p.prefix=line.PrefixDescription;
    
    
    USING PERIODIC COMMIT 10000
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH line,
    CASE  WHEN line.ID_Admit='' OR line.ID_Admit='NULL'  THEN -1 ELSE toInt(line.ID_Admit) END AS ID_Admit
    
    CREATE (a:Admission{HospitalName:line.Hospital,id:ID_Admit,unitId:line.UnitID_Admit,IPDNo:line.IPDNO,DateOfAdmission:line.Date_Admit});
    
    USING PERIODIC COMMIT 10000
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH distinct line.DeptName_Admit AS DeptName_Admit,
    CASE  WHEN line.DeptCode_Admit='' OR line.DeptCode_Admit='NULL'  THEN -1 ELSE toInt(line.DeptCode_Admit) END AS DeptCode_Admit
    
    MERGE (d:Department{id:DeptCode_Admit}) 
      ON CREATE SET d.name=DeptName_Admit;
    
    
    USING PERIODIC COMMIT 10000
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH line,
    CASE  WHEN line.City='' OR line.City='NULL'  THEN "NA" ELSE line.City END AS city
    
    MERGE(l:Location{city:city}) 
      ON CREATE SET l.country=line.Country,l.state=line.State;
    
    
    USING PERIODIC COMMIT 10000
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH
    CASE  WHEN line.MRNo='' OR line.MRNo='null'  THEN "NA" ELSE line.MRNo END AS mrno,
    CASE  WHEN line.ID_Admit='' OR line.ID_Admit='NULL'  THEN -1 ELSE toInt(line.ID_Admit) END AS ID_Admit
    
    MATCH (p:Person { mrno: mrno}) 
    MATCH (a:Admission {id:ID_Admit})
    MERGE (p)-[:Admitted]->(a);
    
    USING PERIODIC COMMIT 10000
    explain
    LOAD CSV WITH HEADERS FROM "file:///home/geralt/Desktop/Temp_Admission.csv" AS line
    WITH
    CASE  WHEN line.ID_Admit='' OR line.ID_Admit='NULL'  THEN -1 ELSE toInt(line.ID_Admit) END AS ID_Admit,
    CASE  WHEN line.City='' OR line.City='NULL'  THEN "NA" ELSE line.City END AS city
    
    MATCH (a:Admission {id:ID_Admit})
    MATCH (l:Location{city:city}) 
    MERGE (a)-[:Located]->(l);