我正在研究一个项目(确切地说是RefactorErl),我们在其中存储erlang程序的语法树和语义图,以对它们进行重构以及其他类型的分析,转换和查询。它们以前存储在不同的键值存储库,关系数据库和基于文档的数据库中,并根据它们进行了基准测试,但是我决定尝试实现neo4j后端,因为它似乎适合诸如“列出所有函数的变量”之类的查询之类的路径。位于名为xyz
“的模块中,并且具有类似的操作。
我写了一个neo4j包装器库(neo4j_driver),因为我发现较旧的包装器库使用的API现在已被弃用,并且几乎可以使用适当的过滤器和所有东西,但其中一个我无法解决的问题是缺乏在查询中匹配多个名称下的相同节点的能力。这似乎是必要的,因为有时不可避免地要通过关系进行向后搜索。例如,当程序(例如,因为需要重构)试图搜索与给定X
(通过给定参数fromId
来标识)相同功能的所有变量时, ),它将其翻译为Cypher并给出如下内容:
MATCH (from:var)<-[edge1:vardef]-(node1)-[edge2:vardef]->(node2) WHERE id(from) = $fromId RETURN labels(node2), id(node2) ORDER BY edge1.orderId, edge2.orderId"
(orderId
是文件中元素的词汇顺序。)
问题在于,这仅给出了X
本身以外的包含X
的函数内部的变量,因为它需要在名称下进行匹配“ from” 和“ node2” 同时显示。有没有一种方法可以构造避免这种问题的查询?我知道我可以尝试避免顺序命名节点并在节点内部使用花括号进行过滤,但是那样我只能进行完全匹配,而不能进行范围匹配。
ps .:我编写的用于翻译路径查询的当前代码如下所示,请告诉我它是否太可怕了,因为我觉得可能有更好的方法可以做到这一点:
filter_to_where_clause_element(NodeName, EdgeName, empty_filter) -> "";
filter_to_where_clause_element(NodeName, EdgeName, Filter) ->
case Filter of
{'not', InnerFilter} ->
"NOT (" ++ filter_to_where_clause_element(NodeName, EdgeName, InnerFilter) ++ ")";
{LeftFilter, 'and', RightFilter} ->
"((" ++ filter_to_where_clause_element(NodeName, EdgeName, LeftFilter) ++
") AND (" ++ filter_to_where_clause_element(NodeName, EdgeName, RightFilter) ++ "))";
{LeftFilter, 'or', RightFilter} ->
"((" ++ filter_to_where_clause_element(NodeName, EdgeName, LeftFilter) ++
") OR (" ++ filter_to_where_clause_element(NodeName, EdgeName, RightFilter) ++ "))";
{Property, '==', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " = '" ++ to_str(Value) ++ "'";
{Property, '/=', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " <> '" ++ to_str(Value) ++ "'";
{Property, '<', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " < '" ++ to_str(Value) ++ "'";
{Property, '>', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " > '" ++ to_str(Value) ++ "'";
{Property, '<=', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " <= '" ++ to_str(Value) ++ "'";
{Property, '>=', Value} ->
NodeName ++ "." ++ to_str(Property) ++ " >= '" ++ to_str(Value) ++ "'";
% last ->
% EdgeName ++ ".orderId = " ++ to_str(get_last_id());
Id when is_integer(Id) ->
EdgeName ++ ".orderId = " ++ to_str(Id);
Other -> own_debug("Unknown filter", [Filter]), ""
end.
path_element_to_query_part({Id, {Tag, Direction, Filter}}) ->
NodeName = "node" ++ to_str(Id),
EdgeName = "edge" ++ to_str(Id),
Edge = "[" ++ EdgeName ++ ":" ++ to_str(Tag) ++ "]",
MatchClauseElement =
case Direction of
back -> "<-" ++ Edge ++ "-";
fwd -> "-" ++ Edge ++ "->"
end ++ "(" ++ NodeName ++ ")",
WhereClauseElement = filter_to_where_clause_element(NodeName, EdgeName, Filter),
OrderByClauseElement = EdgeName ++ ".orderId",
{ MatchClauseElement, WhereClauseElement, OrderByClauseElement }.
path(Node, Path) ->
{?NODETAG, Class, Id} = Node,
NormalizedIndexedPath = lists:zip(
lists:seq(1, length(Path)),
lists:map(fun(PE) -> normalize_path_element(PE) end, Path)
),
QueryPathElements = lists:map(
fun(IndexedPathElement) ->
path_element_to_query_part(IndexedPathElement)
end,
NormalizedIndexedPath
),
{ MatchClauseElements, WhereClauseElements, OrderByClauseElements } = {
lists:map(
fun(QueryPathElement) -> element(1, QueryPathElement) end,
QueryPathElements
),
lists:map(
fun(QueryPathElement) -> element(2, QueryPathElement) end,
QueryPathElements
),
lists:map(
fun(QueryPathElement) -> element(3, QueryPathElement) end,
QueryPathElements
)
},
JoinedMatchClause = string:join(MatchClauseElements, ""),
LastNodeName = "node" ++ to_str(length(Path)),
JoinedWhereClause = string:join(
lists:filter(
fun(WhereClauseElement) -> WhereClauseElement /= "" end,
["id(from) = $fromId" | WhereClauseElements]
),
" AND "
),
JoinedOrderByClause = string:join(OrderByClauseElements, ", "),
Statements = [
[
{ statement,
"MATCH (from:" ++ to_str(Class) ++ ")" ++
JoinedMatchClause ++
" WHERE " ++ JoinedWhereClause ++
" RETURN labels(" ++ LastNodeName ++ "), id(" ++ LastNodeName ++ ")" ++
" ORDER BY " ++ JoinedOrderByClause
},
{ parameters, [
{ "fromId", Id }
] }
]
],
Result = run_cypher_query(Statements),
MatchedNodes = lists:map(fun (Row) ->
[[TargetClass], TargetId] = Row,
{?NODETAG, parse(TargetClass), TargetId}
end, get_rows(Result)),
{ ok, MatchedNodes }.