我正在构建一个“匹配”配置文件的应用程序。 例如,这是我的schema ::
的简化版本 用户
标识
名字
姓氏
用户配置
标识
用户ID
SomeOtherFields
UserProfileFields
标识
UserProfileId
重点
价值
UserProfile用于保存一些标准信息(出生日期等...)
UserProfileFields基本上是一个键及其值的列表,以便建立一个看起来像这样的字典(再次,为了这个问题的目的简化)
UserProfileID | Key | Value
123 | food | Pizza
123 | food | Indian
4453 | drink | Coke
44850 | drink | Orange Juice
88493 | food | Pizza
448382 | food | Chinese
所以,从上面我们可以看到,配置文件123与食物上的88493相匹配 - 它们都有食物|披萨
有没有办法有效地查询此表以获取“匹配”列表
我会设想每天运行一次,结果存储在一个单独的表中
例如:
匹配
MatchID | ProfileID
1 | 123
1 | 88493
我正在猜测
的内容SELECT * FROM UserProfileFields
GROUP BY Key
查询类型......但不确定这会有多少效率,例如100万行?
答案 0 :(得分:2)
这应该照顾你。
-- ============================================================================
-- BEGIN: SETUP TEST DATA
-- ============================================================================
CREATE TABLE UserProfileFields (
UserProfileID int
,[Key] varchar(5)
,Value varchar(12)
);
INSERT UserProfileFields (UserProfileID, [Key], Value)
SELECT A.*
FROM (
SELECT * FROM UserProfileFields WHERE 1=2
UNION ALL SELECT 123, 'food', 'Pizza'
UNION ALL SELECT 123, 'food', 'Indian'
UNION ALL SELECT 4453, 'drink', 'Coke'
UNION ALL SELECT 44850, 'drink', 'Orange Juice'
UNION ALL SELECT 88493, 'food', 'Pizza'
UNION ALL SELECT 448382, 'food', 'Chinese'
UNION ALL SELECT 88493, 'drink', 'Coke'
UNION ALL SELECT 88493, 'drink', 'Orange Juice'
) A;
--/*
-- Turn 8 records into 1,048,576
DECLARE @Count int; SELECT @Count = 0;
WHILE @Count < 17
BEGIN
INSERT UserProfileFields
SELECT * FROM UserProfileFields
SELECT @Count = (@Count + 1)
END
--*/
-- SELECT COUNT(*) FROM UserProfileFields WITH (NOLOCK)
-- ============================================================================
-- END: SETUP TEST DATA
-- ============================================================================
-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#DistinctValues', 'U') IS NOT NULL DROP TABLE #DistinctValues;
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;
SELECT [Key], UserProfileID, Value
INTO #DistinctValues
FROM UserProfileFields WITH (NOLOCK)
GROUP BY [Key], UserProfileID, Value;
SELECT A.[Key], A.Value, A.UserProfileID
INTO #Matches
FROM #DistinctValues A
JOIN #DistinctValues B
ON A.[Key] = B.[Key]
AND A.Value = B.Value
AND A.UserProfileID <> B.UserProfileID;
SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
,A.UserProfileID
,A.[Key]
,A.Value
FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================
-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;
SELECT A.[Key], A.Value, A.UserProfileID
INTO #Matches
FROM UserProfileFields A WITH (NOLOCK)
JOIN UserProfileFields B WITH (NOLOCK)
ON A.[Key] = B.[Key]
AND A.Value = B.Value
AND A.UserProfileID <> B.UserProfileID;
SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
,A.UserProfileID
,A.[Key]
,A.Value
FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================
答案 1 :(得分:1)
WITH Matches
AS
(
SELECT a.UserProfileID,
a.[Key],
a.Value,
DENSE_RANK() OVER(ORDER BY a.[Key]) MatchID
FROM UserProfileFields a
INNER JOIN
(
SELECT [Key], Value
FROM UserProfileFields
GROUP BY [Key], Value
HAVING COUNT(DISTINCT UserProfileID) > 1
) b ON a.[Key] = b.[Key] AND
a.Value = b.Value
)
SELECT MatchID, UserProfileID
FROM Matches
答案 2 :(得分:1)
使用EXISTS()运算符选项并覆盖索引。这有助于避免过多的数据排序。
CREATE INDEX ix_Key_Value_UserProfileFields ON dbo.UserProfileFields([Key], Value) INCLUDE(UserProfileID)
SELECT DENSE_RANK() OVER(ORDER BY t.[Key], t.Value) AS MatchID, t.UserProfileID
FROM dbo.UserProfileFields t
WHERE EXISTS (
SELECT 1
FROM dbo.UserProfileFields t2
WHERE t.[Key] = t2.[Key]
AND t.Value = t2.Value
HAVING COUNT(*) > 1
)
SQLFiddle上的演示