我必须计算SQL Server中图表中节点的邻居之间的边缘,而我有GraphNodes
和GraphEdges
的表。表的结构可在previous question
关于上一个问题,这里问题的方面是不同的:
我必须执行这些步骤来执行任务,即
从GraphNodes
必须有DISTINCT
V 的邻居列表(例如,在 TABLE 变量SQL中)
GraphEdges
我为单个节点尝试的查询工作正常,即
SELECT GN.id, COUNT(DISTINCT(CONCAT(GE.Source_Node,'-', GE.Target_Node))) AS NeighborLinks
FROM GraphEdges GE
JOIN GraphNodes GN ON GN.id = 512
WHERE Source_Node IN (SELECT DISTINCT Target_Node FROM GraphEdges WHERE Source_Node = 512
UNION ALL
SELECT DISTINCT Source_Node FROM GraphEdges WHERE Target_Node = 512
)
AND Target_Node IN (SELECT DISTINCT Target_Node FROM GraphEdges WHERE Source_Node = 512
UNION ALL
SELECT DISTINCT Source_Node FROM GraphEdges WHERE Target_Node = 512
)
GROUP BY GN.id
我已将id = 512作为id
中GraphNodes
的样本。此查询输出为:
+-------+-----------------+
| id | NeighborLinks |
+-------+-----------------+
| 512 | 6 |
+-------+-----------------+
在UNION ALL
子句中使用WHERE
的原因是id
即512
在两列中都存在,即Source_Node
和Target_Node
为那么,必须从两列中选择DISTINCT邻居。此外,使用GE.Source_Node
和GE.Target_Node
的相同列表,因为必须仅检查 V 的邻居之间的链接,即512
。
问题是如何使用我认为TABLE变量或任何其他方法来解决提供长列表值而不是512
的问题
我已经提出了关于表变量的这个解决方案,但是在查询中使用表变量得到了错误:
尝试1
DECLARE @ID TABLE(id INT)
DECLARE @S_Neighbor TABLE (id INT)
DECLARE @T_Neighbor TABLE (id INT)
INSERT INTO @ID SELECT id FROM GraphNodes
INSERT INTO @S_Neighbor SELECT DISTINCT Source_Node
FROM GraphEdges
WHERE Target_Node IN (SELECT id FROM @ID)
--UNION ALL
INSERT INTO @T_Neighbor SELECT DISTINCT Target_Node
FROM GraphEdges
WHERE Source_Node IN (SELECT id FROM @ID)
SELECT GN.id,COUNT(DISTINCT(CONCAT(GE.Source_Node,'-', GE.Target_Node))) AS Mutual_Links
FROM GraphEdges GE
JOIN GraphNodes GN ON GN.id = @ID
WHERE Source_Node IN (SELECT DISTINCT Target_Node
FROM GraphEdges
WHERE Source_Node IN @T_Neighbor
UNION ALL
SELECT DISTINCT Source_Node
FROM GraphEdges
WHERE Target_Node IN @S_Neighbor)
AND Target_Node IN (SELECT DISTINCT Target_Node
FROM GraphEdges
WHERE Source_Node IN @S_Neighbor
UNION ALL
SELECT DISTINCT Source_Node
FROM GraphEdges
WHERE Target_Node IN @T_Neighbor)
GROUP BY GN.id
我也试过这个:
尝试2
DECLARE @ID_COUNTER INT
DECLARE @MAX_ID INT
SET @ID_COUNTER = 1
SET @MAX_ID = 148410
WHILE @ID_COUNTER <= @MAX_ID
BEGIN
(
SELECT GN.id,
COUNT(DISTINCT(CONCAT(GE.Source_Node,'-', GE.Target_Node))) AS Mutual_Links
FROM GraphEdges GE
JOIN GraphNodes GN ON GN.id = @ID_COUNTER
WHERE Source_Node IN (SELECT DISTINCT Target_Node
FROM GraphEdges WHERE Source_Node = @ID_COUNTER
UNION ALL
SELECT DISTINCT Source_Node
FROM GraphEdges WHERE Target_Node = @ID_COUNTER
)
AND Target_Node IN (SELECT DISTINCT Target_Node
FROM GraphEdges WHERE Source_Node = @ID_COUNTER
UNION ALL
SELECT DISTINCT Source_Node
FROM GraphEdges WHERE Target_Node = @ID_COUNTER
)
GROUP BY GN.id
)
SET @ID_COUNTER += 1
END
我使用了@MAX_ID = 3并且返回输出需要56秒,而@MAX_ID最初是= 148410.虽然NeighborLinks
的返回值是正确的,但显示的输出位于三个单独的窗口中,如下所示:
id NeighborLinks
1 53
id NeighborLinks
2 318
id NeighborLinks
3 297
答案 0 :(得分:1)
我认为您正在寻找类似下面代码段的内容。
我已经为图形节点(#graph_edges
)创建了一个临时表。要查找(不同)邻居数的节点位于临时表#nodes
中。
CREATE TABLE #graph_edges(source_node INT NOT NULL,target_node INT NOT NULL);
CREATE TABLE #nodes(id INT NOT NULL PRIMARY KEY);
--INSERT INTO #nodes(id)VALUES(512),(513),(514); -- specific nodes to look up in the graph
INSERT INTO #nodes(id)
SELECT source_node FROM #graph_edges UNION SELECT target_node FROM #graph_edges; -- lookup for all distinct nodes ID's in the graph
SELECT id,neighbor_links=COUNT(*)
FROM
(
SELECT n.id,l=ge.source_node,r=ge.target_node
FROM #nodes AS n
INNER JOIN #graph_edges AS ge ON
ge.source_node=n.id
UNION -- union of the two sets, this filters duplicate rows (ie no duplicate source_node,target_node row will appear in the derived table)
SELECT n.id,l=ge.target_node,r=ge.source_node
FROM #nodes AS n
INNER JOIN #graph_edges AS ge ON
ge.target_node=n.id
) AS l
GROUP BY id
ORDER BY id;
DROP TABLE #nodes;
DROP TABLE #graph_edges;