我在MSSQL中有以下查询
SELECT TOP 50 CustomerID FROM Ratings
WHERE CustomerID != 915
AND MovieID IN (SELECT DISTINCT MovieID FROM Ratings WHERE CustomerID = 915)
GROUP BY CustomerID
ORDER BY count(*) DESC
超级快。当我尝试在像这样的子查询中使用它时。
SELECT * FROM Ratings
WHERE MovieID = 1 AND
CustomerID IN (SELECT TOP 50 CustomerID FROM Ratings
WHERE CustomerID != 915
AND MovieID IN (SELECT DISTINCT MovieID FROM Ratings WHERE CustomerID = 915)
GROUP BY CustomerID
ORDER BY count(*) DESC)
有关为什么这么慢以及如何加快速度的想法?我的主键是(MovieID-CustomerID),我在CustomerID
上添加了一个索引答案 0 :(得分:5)
您需要在UNIQUE
上创建额外的(CustomerID, MovieID)
索引(按此顺序)以改进此查询。
请参阅我博客中的文章了解效果详情:
由于您的子查询返回UNIQUE
个值集,因此查询可能会被重写为JOIN
:
SELECT r2.*
FROM (
SELECT TOP 50 CustomerID
FROM (
SELECT MovieID
FROM Ratings
WHERE CustomerID = 915
) q
JOIN Ratings r
ON r.MovieID = q.MovieID
AND CustomerID <> 915
GROUP BY
CustomerID
ORDER BY
COUNT(*) DESC
) ro
JOIN Ratings r2
ON r2.MovieID = 1
AND r2.CustomerID = ro.CustomerID
要选择TOP 50
评分为Movie 1
的客户,请使用:
SELECT r2.*
FROM (
SELECT TOP 50 CustomerID
FROM (
SELECT MovieID
FROM Ratings
WHERE CustomerID = 915
) q
JOIN Ratings r
ON r.MovieID = q.MovieID
AND CustomerID <> 915
AND EXISTS
(
SELECT 1
FROM Ratings re
WHERE re.MovieID = 1
AND re.CustomerID = r.CustomerID
)
GROUP BY
CustomerID
ORDER BY
COUNT(*) DESC
) ro
JOIN Ratings r2
ON r2.MovieID = 1
AND r2.CustomerID = ro.CustomerID