Neo4j Cypher查询结构和性能优化

时间:2017-05-06 20:03:23

标签: neo4j cypher

我创建了一个Cypher查询动态构建器。对于复杂的情况,此构建器会生成相当大的查询,例如:

MATCH (parentD)-[:CONTAINS]->(childD:Decision)-[ru:CREATED_BY]->(u:User) 
WHERE id(parentD) = {decisionId} 
MATCH (childD)<-[:SET_FOR]-(filterValue415431:Value)-[:SET_ON]->(filterCharacteristic415431:Characteristic) 
WHERE id(filterCharacteristic415431) = 415431 
WITH filterValue415431, childD, ru, u 
WHERE  ({filterValue4154311} IN filterValue415431.value ) 
 OR ({filterValue4154312} IN filterValue415431.value ) 
 OR ({filterValue4154313} IN filterValue415431.value ) 
 OR ({filterValue4154314} IN filterValue415431.value ) 
 OR ({filterValue4154315} IN filterValue415431.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue415441:Value)-[:SET_ON]->(filterCharacteristic415441:Characteristic) 
WHERE id(filterCharacteristic415441) = 415441 
WITH filterValue415441, childD, ru, u 
WHERE  ({filterValue4154416} IN filterValue415441.value ) 
 OR ({filterValue4154417} IN filterValue415441.value ) 
 OR ({filterValue4154418} IN filterValue415441.value ) 
 OR ({filterValue4154419} IN filterValue415441.value ) 
 OR ({filterValue41544110} IN filterValue415441.value ) 
 OR ({filterValue41544111} IN filterValue415441.value ) 
 OR ({filterValue41544112} IN filterValue415441.value ) 
 OR ({filterValue41544113} IN filterValue415441.value ) 
 OR ({filterValue41544114} IN filterValue415441.value ) 
 OR ({filterValue41544115} IN filterValue415441.value ) 
 OR ({filterValue41544116} IN filterValue415441.value ) 
 OR ({filterValue41544117} IN filterValue415441.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue416273:Value)-[:SET_ON]->(filterCharacteristic416273:Characteristic) 
WHERE id(filterCharacteristic416273) = 416273 
WITH filterValue416273, childD, ru, u 
WHERE  (filterValue416273.value >= {filterValue41627318}) 
 AND (filterValue416273.value <= {filterValue41627319})  
MATCH (childD)<-[:SET_FOR]-(filterValue417410:Value)-[:SET_ON]->(filterCharacteristic417410:Characteristic) 
WHERE id(filterCharacteristic417410) = 417410 
WITH filterValue417410, childD, ru, u 
MATCH (childD)<-[:SET_FOR]-(filterValue416423:Value)-[:SET_ON]->(filterCharacteristic416423:Characteristic) 
WHERE id(filterCharacteristic416423) = 416423 
WITH filterValue416423, childD, ru, u 
WHERE  ({filterValue41642320} IN filterValue416423.value ) 
 OR ({filterValue41642321} IN filterValue416423.value ) 
 OR ({filterValue41642322} IN filterValue416423.value ) 
 OR ({filterValue41642323} IN filterValue416423.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue415673:Value)-[:SET_ON]->(filterCharacteristic415673:Characteristic) 
WHERE id(filterCharacteristic415673) = 415673 
WITH filterValue415673, childD, ru, u 
WHERE  ({filterValue41567324} IN filterValue415673.value ) 
 OR ({filterValue41567325} IN filterValue415673.value ) 
 OR ({filterValue41567326} IN filterValue415673.value ) 
 OR ({filterValue41567327} IN filterValue415673.value ) 
 OR ({filterValue41567328} IN filterValue415673.value ) 
 OR ({filterValue41567329} IN filterValue415673.value ) 
 OR ({filterValue41567330} IN filterValue415673.value ) 
 OR ({filterValue41567331} IN filterValue415673.value ) 
 OR ({filterValue41567332} IN filterValue415673.value ) 
 OR ({filterValue41567333} IN filterValue415673.value ) 
 OR ({filterValue41567334} IN filterValue415673.value ) 
 OR ({filterValue41567335} IN filterValue415673.value ) 
 OR ({filterValue41567336} IN filterValue415673.value ) 
 OR ({filterValue41567337} IN filterValue415673.value ) 
 OR ({filterValue41567338} IN filterValue415673.value ) 
 OR ({filterValue41567339} IN filterValue415673.value ) 
OPTIONAL MATCH (childD)<-[:VOTED_FOR]-(vg:VoteGroup)-[:VOTED_ON]->(c:Criterion) 
WHERE id(c) IN {criteriaIds} 
WITH childD, ru, u, vg.avgVotesWeight as weight, vg.totalVotes as totalVotes 
WITH ru, u, childD , toFloat(sum(weight)) as weight, toInt(sum(totalVotes)) as totalVotes  
ORDER BY  weight DESC 
SKIP 0 LIMIT 10 
RETURN ru, u, childD AS decision, weight, totalVotes, 
 [ (parentD)<-[:DEFINED_BY]-(entity)<-[:COMMENTED_ON]-(comg:CommentGroup)-[:COMMENTED_FOR]->(childD) | 
  {entityId: id(entity),  types: labels(entity), totalComments: toInt(comg.totalComments)} ] AS commentGroups, 
 [ (parentD)<-[:DEFINED_BY]-(c1:Criterion)<-[:VOTED_ON]-(vg1:VoteGroup)-[:VOTED_FOR]->(childD) | 
  {criterionId: id(c1),  weight: vg1.avgVotesWeight, totalVotes: toInt(vg1.totalVotes)} ] AS weightedCriteria, 
 [ (parentD)<-[:DEFINED_BY]-(ch1:Characteristic)<-[:SET_ON]-(v1:Value)-[:SET_FOR]->(childD) | 
  {characteristicId: id(ch1),  value: v1.value, valueType: ch1.valueType, visualMode: ch1.visualMode} ] AS valuedCharacteristics

现在我对表演不满意。例如,对此查询的调用需要~500ms

您能否请一看,看看是否有机会改进此查询?

已更新

这是一个几乎相同的查询,但有不同的参数:

MATCH (parentD)-[:CONTAINS]->(childD:Decision)-[ru:CREATED_BY]->(u:User) 
WHERE id(parentD) = 415406 
MATCH (childD)<-[:SET_FOR]-(filterValue416423:Value)-[:SET_ON]->(filterCharacteristic416423:Characteristic) 
WHERE id(filterCharacteristic416423) = 416423 
WITH filterValue416423, childD, ru, u 
WHERE ('Adobe RGB' IN filterValue416423.value ) OR ('ECI RGB' IN filterValue416423.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue416273:Value)-[:SET_ON]->(filterCharacteristic416273:Characteristic) 
WHERE id(filterCharacteristic416273) = 416273 WITH filterValue416273, childD, ru, u 
WHERE  (filterValue416273.value >= 4) AND (filterValue416273.value <= 53)  
MATCH (childD)<-[:SET_FOR]-(filterValue415431:Value)-[:SET_ON]->(filterCharacteristic415431:Characteristic) 
WHERE id(filterCharacteristic415431) = 415431 WITH filterValue415431, childD, ru, u 
WHERE  ('Compact' IN filterValue415431.value ) 
  OR ('Compact SLR' IN filterValue415431.value ) 
  OR ('Large SLR' IN filterValue415431.value ) 
  OR ('Rangefinder-style mirrorless' IN filterValue415431.value ) 
  OR ('SLR-like (bridge)' IN filterValue415431.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue415441:Value)-[:SET_ON]->(filterCharacteristic415441:Characteristic) 
WHERE id(filterCharacteristic415441) = 415441 WITH filterValue415441, childD, ru, u 
WHERE  ('Brass' IN filterValue415441.value ) 
  OR ('Carbon fiber' IN filterValue415441.value )  
  OPTIONAL MATCH (childD)<-[:VOTED_FOR]-(vg:VoteGroup)-[:VOTED_ON]->(c:Criterion) 
WHERE id(c) IN [415414, 415415, 415412, 415426, 415411]  
WITH childD, ru, u, vg.avgVotesWeight as weight, vg.totalVotes as totalVotes 
WITH ru, u, childD , toFloat(sum(weight)) as weight, toInt(sum(totalVotes)) as totalVotes  
ORDER BY  weight DESC 
SKIP 0 LIMIT 10 
RETURN ru, u, childD AS decision, weight, totalVotes, 
[ (parentD)<-[:DEFINED_BY]-(entity)<-[:COMMENTED_ON]-(comg:CommentGroup)-[:COMMENTED_FOR]->(childD) | 
  {entityId: id(entity),  types: labels(entity), totalComments: toInt(comg.totalComments)} ] AS commentGroups, 
[ (parentD)<-[:DEFINED_BY]-(c1:Criterion)<-[:VOTED_ON]-(vg1:VoteGroup)-[:VOTED_FOR]->(childD) | 
  {criterionId: id(c1),  weight: vg1.avgVotesWeight, totalVotes: toInt(vg1.totalVotes)} ] AS weightedCriteria, 
[ (parentD)<-[:DEFINED_BY]-(ch1:Characteristic)<-[:SET_ON]-(v1:Value)-[:SET_FOR]->(childD) | 
  {characteristicId: id(ch1),  value: v1.value, valueType: ch1.valueType, visualMode: ch1.visualMode} ] AS valuedCharacteristics

Cypher版本:CYPHER 3.1,规划师:COST,运行时间:解释。在390毫秒内总共达到646192 db。

enter image description here

已更新

这是:schema

的输出
Indexes
   ON :Characteristic(lowerName) ONLINE
   ON :CharacteristicGroup(lowerName) ONLINE
   ON :Criterion(lowerName) ONLINE
   ON :CriterionGroup(lowerName) ONLINE
   ON :Decision(lowerName) ONLINE
   ON :FlagType(name) ONLINE (for uniqueness constraint)
   ON :HistoryValue(originalValue) ONLINE
   ON :Permission(code) ONLINE (for uniqueness constraint)
   ON :Role(name) ONLINE (for uniqueness constraint)
   ON :User(email) ONLINE (for uniqueness constraint)
   ON :User(username) ONLINE (for uniqueness constraint)
   ON :Value(value) ONLINE

Constraints
   ON ( flagtype:FlagType ) ASSERT flagtype.name IS UNIQUE
   ON ( permission:Permission ) ASSERT permission.code IS UNIQUE
   ON ( role:Role ) ASSERT role.name IS UNIQUE
   ON ( user:User ) ASSERT user.email IS UNIQUE
   ON ( user:User ) ASSERT user.username IS UNIQUE

已更新

我已根据以下答案优化了查询:

MATCH (parentD)-[:CONTAINS]->(childD:Decision)
WHERE id(parentD) = 415406 
MATCH (childD)<-[:SET_FOR]-(filterValue416423)-[:SET_ON]->(filterCharacteristic416423) 
WHERE id(filterCharacteristic416423) = 416423 
WITH DISTINCT filterValue416423, childD 
WHERE ('Adobe RGB' IN filterValue416423.value ) OR ('ECI RGB' IN filterValue416423.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue416273)-[:SET_ON]->(filterCharacteristic416273) 
WHERE id(filterCharacteristic416273) = 416273 
WITH DISTINCT childD, filterValue416273  
WHERE  (filterValue416273.value >= 4) AND (filterValue416273.value <= 53)  
MATCH (childD)<-[:SET_FOR]-(filterValue415431)-[:SET_ON]->(filterCharacteristic415431) 
WHERE id(filterCharacteristic415431) = 415431 
WITH DISTINCT childD, filterValue415431 
WHERE  ('Compact' IN filterValue415431.value ) 
  OR ('Compact SLR' IN filterValue415431.value ) 
  OR ('Large SLR' IN filterValue415431.value ) 
  OR ('Rangefinder-style mirrorless' IN filterValue415431.value ) 
  OR ('SLR-like (bridge)' IN filterValue415431.value )  
MATCH (childD)<-[:SET_FOR]-(filterValue415441)-[:SET_ON]->(filterCharacteristic415441) 
WHERE id(filterCharacteristic415441) = 415441 
WITH DISTINCT childD, filterValue415441 
WHERE  ('Brass' IN filterValue415441.value ) 
  OR ('Carbon fiber' IN filterValue415441.value )  
  OPTIONAL MATCH (childD)<-[:VOTED_FOR]-(vg:VoteGroup)-[:VOTED_ON]->(c:Criterion) 
WHERE id(c) IN [415414, 415415, 415412, 415426, 415411]  

WITH DISTINCT * MATCH (childD)-[ru:CREATED_BY]->(u:User)  
WITH DISTINCT childD, ru, u, vg.avgVotesWeight as weight, vg.totalVotes as totalVotes 
WITH DISTINCT ru, u, childD , toFloat(sum(weight)) as weight, toInt(sum(totalVotes)) as totalVotes  
ORDER BY  weight DESC 
SKIP 0 LIMIT 10 
RETURN ru, u, childD AS decision, weight, totalVotes, 
[ (parentD)<-[:DEFINED_BY]-(entity)<-[:COMMENTED_ON]-(comg:CommentGroup)-[:COMMENTED_FOR]->(childD) | 
  {entityId: id(entity),  types: labels(entity), totalComments: toInt(comg.totalComments)} ] AS commentGroups, 
[ (parentD)<-[:DEFINED_BY]-(c1:Criterion)<-[:VOTED_ON]-(vg1:VoteGroup)-[:VOTED_FOR]->(childD) | 
  {criterionId: id(c1),  weight: vg1.avgVotesWeight, totalVotes: toInt(vg1.totalVotes)} ] AS weightedCriteria, 
[ (parentD)<-[:DEFINED_BY]-(ch1:Characteristic)<-[:SET_ON]-(v1)-[:SET_FOR]->(childD) | 
  {characteristicId: id(ch1),  value: v1.value, valueType: ch1.valueType, visualMode: ch1.visualMode} ] AS valuedCharacteristics

PROFILE输出:

enter image description here

使用DISTINCT childD时,查询的工作速度非常慢,但距离完美

还要好得多

再试一次

PROFILE MATCH (parentD)-[:CONTAINS]->(childD:Decision)
WHERE id(parentD) = 415406 
MATCH (childD)<-[:SET_FOR]-(filterValue416423)-[:SET_ON]->(filterCharacteristic416423)
USING JOIN ON childD
WHERE id(filterCharacteristic416423) = 416423
AND ('Adobe RGB' IN filterValue416423.value ) OR ('ECI RGB' IN filterValue416423.value )
WITH DISTINCT childD
MATCH (childD)<-[:SET_FOR]-(filterValue416273)-[:SET_ON]->(filterCharacteristic416273) 
USING JOIN ON childD
WHERE id(filterCharacteristic416273) = 416273 AND (filterValue416273.value >= 4) AND (filterValue416273.value <= 53)
WITH DISTINCT childD  
MATCH (childD)<-[:SET_FOR]-(filterValue415431)-[:SET_ON]->(filterCharacteristic415431) 
USING JOIN ON childD
WHERE id(filterCharacteristic415431) = 415431
AND ('Compact' IN filterValue415431.value ) 
  OR ('Compact SLR' IN filterValue415431.value ) 
  OR ('Large SLR' IN filterValue415431.value ) 
  OR ('Rangefinder-style mirrorless' IN filterValue415431.value ) 
  OR ('SLR-like (bridge)' IN filterValue415431.value )
WITH DISTINCT childD
MATCH (childD)<-[:SET_FOR]-(filterValue415441)-[:SET_ON]->(filterCharacteristic415441) 
USING JOIN ON childD
WHERE id(filterCharacteristic415441) = 415441
AND ('Brass' IN filterValue415441.value ) 
  OR ('Carbon fiber' IN filterValue415441.value )  

OPTIONAL MATCH (childD)<-[:VOTED_FOR]-(vg:VoteGroup)-[:VOTED_ON]->(c:Criterion) 
WHERE id(c) IN [415414, 415415, 415412, 415426, 415411]  

WITH DISTINCT * MATCH (childD)-[ru:CREATED_BY]->(u:User)  
WITH DISTINCT childD, ru, u, vg.avgVotesWeight as weight, vg.totalVotes as totalVotes 
WITH DISTINCT ru, u, childD , toFloat(sum(weight)) as weight, toInt(sum(totalVotes)) as totalVotes  
ORDER BY  weight DESC 
SKIP 0 LIMIT 10 
RETURN childD

enter image description here

1 个答案:

答案 0 :(得分:2)

你的查询的主要问题是你基本上做了很多检查,行是狂野的。因此,这里有一些提示可以减少每个MATCH生成的行数。

1)除非你需要重复,否则使用WITH DISTINCT而不是WITH。 WITH可以创建重复的行(因为您只剪切了一列),并且您处理的每个重复行都是浪费时间和额外的数据库命中。 (即,您删除的每个过滤器列都会添加重复的行)

2):Value.value重载。它没有语义含义,并且该值甚至不能保证为任何类型。这意味着每一个:价值检查必须走出去触摸一堆:与您搜索的内容无关的值节点。因此,当附加的数量:值节点增加时,找到正确的节点就越昂贵(如果可以将其编入索引,这会更便宜,因此它可以找到正确的:值,并查看它连接到的是什么如果您无法更改您正在使用的架构,并且通过架构,我的意思是如何设置数据/关系,这无济于事。

3)只检查您需要检查的内容。说(a:A) - [:TO] - &gt;(b:B)似乎更有效,但如果所有[:TO]都来自:A到:B,Neo4j现在必须验证第一个节点是:A,第二个节点是:B。 Cypher不知道什么是隐含的,所以它必须进行检查,但是这些冗余检查中的每一个都必须熄灭并且每行都按DB。所以最好说(a) - [:TO] - &gt;(b)。

4)限制变量范围。在这里,您在开头匹配 - [ru:CREATED_BY] - &gt;(u:用户),但不要使用它直到结尾,没有过滤器。这会使每个决定的行数乘以 - [ru:CREATED_BY] - &gt;(u:用户),并且必须在进一步的匹配中检查ALL。除非 - [ru:CREATED_BY] - &gt;(u:用户)以某种方式极大地限制了匹配的决策(或每个决策只能有一个),最后匹配此支持信息。

5)订购过滤器从最强到最弱(如果可以的话)。尽可能早地减少行数。

6)尽量减少行数。拉出的每一行使得查询中的以下步骤必须更加困难,因此最小化查询中的行。如果您使用OR来组合不相关但相似的列查询(例如条件A或具有条件B的组织的所有组织),并且两个查询的工作只会使另一半的事情变得更加昂贵,那么使用UNION可能会更好结合更小,更快的查询的结果(UNION可以并行运行直到合并结果)。请注意,[1,2,3]中的WHERE org.id等简单查询仍然比UNION快,因为这项工作都可以在一次查找中完成。

除了工会之外,如果您正在收集未经过筛选的节点,您可以使用collect(列)来减少重复&#39;最多为1行,而不是UNWIND(列)作为查询末尾的列,以获取您的行! (此处引用变量名称列)

7)在1个节点上做了很多过滤器? Cypher有USING提示!提示USING JOIN ON column告诉Cypher,使用更多的起始叶子并加入它们可能会更有效地进行此匹配。因此,在每次匹配时使用USING JOIN ON childD将告诉Cypher并行执行所有过滤器,并使用所有过滤器的重叠行。请注意,USING只是告诉Cypher&#34;相信我,如果我们尝试这样做,这应该会更快&#34;如果你错了,这实际上可以使查询更糟。 (尽管使用大型查询更加平行,但USING JOIN应该很有用)

<强>更新
首先,注意node.id =&#34;常数&#34; AND node.value =&#34;常数&#34; OR node.id =&#34; constant2&#34; AND node.value =&#34; constant2&#34; vs node.value = map [node.id]。第一个查询能够对节点查找进行节点过滤,而后者必须过滤已经查找过的所有节点。如果没有先前对该查找进行过滤,则意味着地图必须拉入所有节点。虽然地图提供了一定程度的(可论证的)简单性/灵活性,但它是过滤节点的效率最低的方法之一。

其次,您现在查询的最大问题是:值超级重载,您无法通过ID查找。 :值应为关系,或具有索引ID字段,以便您不必触摸ALL&lt; - [:SET_FOR] - 和 - [:SET_ON] - &gt;。使用Join提示我认为至少会使SET_FOR具有更高的优先级,这似乎是两者中效率更高的。

这是我尝试更有效地重写PROFILE查询。 (V1)

MATCH (parentD)-[:CONTAINS]->(childD:Decision)
WHERE id(parentD) = 415406 
MATCH (childD)<-[:SET_FOR]-(filterValue416423)-[:SET_ON]->(filterCharacteristic416423)
USING JOIN ON childD
WHERE id(filterCharacteristic416423) = 416423
WHERE ('Adobe RGB' IN filterValue416423.value ) OR ('ECI RGB' IN filterValue416423.value )
WITH DISTINCT childD
MATCH (childD)<-[:SET_FOR]-(filterValue416273)-[:SET_ON]->(filterCharacteristic416273) 
USING JOIN ON childD
WHERE id(filterCharacteristic416273) = 416273 AND (filterValue416273.value >= 4) AND (filterValue416273.value <= 53)
WITH DISTINCT childD  
MATCH (childD)<-[:SET_FOR]-(filterValue415431)-[:SET_ON]->(filterCharacteristic415431) 
USING JOIN ON childD
WHERE id(filterCharacteristic415431) = 415431
WHERE ('Compact' IN filterValue415431.value ) 
  OR ('Compact SLR' IN filterValue415431.value ) 
  OR ('Large SLR' IN filterValue415431.value ) 
  OR ('Rangefinder-style mirrorless' IN filterValue415431.value ) 
  OR ('SLR-like (bridge)' IN filterValue415431.value )
WITH DISTINCT childD
MATCH (childD)<-[:SET_FOR]-(filterValue415441)-[:SET_ON]->(filterCharacteristic415441) 
USING JOIN ON childD
WHERE id(filterCharacteristic415441) = 415441
WHERE  ('Brass' IN filterValue415441.value ) 
  OR ('Carbon fiber' IN filterValue415441.value )  

OPTIONAL MATCH (childD)<-[:VOTED_FOR]-(vg:VoteGroup)-[:VOTED_ON]->(c:Criterion) 
WHERE id(c) IN [415414, 415415, 415412, 415426, 415411]  

WITH DISTINCT * MATCH (childD)-[ru:CREATED_BY]->(u:User)  
WITH DISTINCT childD, ru, u, vg.avgVotesWeight as weight, vg.totalVotes as totalVotes 
WITH DISTINCT ru, u, childD , toFloat(sum(weight)) as weight, toInt(sum(totalVotes)) as totalVotes  
ORDER BY  weight DESC 
SKIP 0 LIMIT 10 
RETURN ru, u, childD AS decision, weight, totalVotes, 
[ (parentD)<-[:DEFINED_BY]-(entity)<-[:COMMENTED_ON]-(comg:CommentGroup)-[:COMMENTED_FOR]->(childD) | 
  {entityId: id(entity),  types: labels(entity), totalComments: toInt(comg.totalComments)} ] AS commentGroups, 
[ (parentD)<-[:DEFINED_BY]-(c1:Criterion)<-[:VOTED_ON]-(vg1:VoteGroup)-[:VOTED_FOR]->(childD) | 
  {criterionId: id(c1),  weight: vg1.avgVotesWeight, totalVotes: toInt(vg1.totalVotes)} ] AS weightedCriteria, 
[ (parentD)<-[:DEFINED_BY]-(ch1:Characteristic)<-[:SET_ON]-(v1)-[:SET_FOR]->(childD) | 
  {characteristicId: id(ch1),  value: v1.value, valueType: ch1.valueType, visualMode: ch1.visualMode} ] AS valuedCharacteristics