需要更便宜的查询

时间:2015-01-27 21:58:18

标签: sql sql-server tsql

我有三张桌子,人,电子邮件和personemail。 Personemail基本上有人和电子邮件的外键,因此一个人可以链接到多个电子邮件地址。电子邮件表还有一个名为primaryemail的字段。此字段为1或0.主电子邮件标记用于将电子邮件拉入报告/发票等。

UI中存在一个逻辑缺陷,允许用户不为客户设置主电子邮件地址。我已经关闭了逻辑缺陷,但是我需要一个脚本来强制为没有一组的客户提供主电子邮件地址。决定将主电子邮件地址设置为emailid的最低值(电子邮件表中的主键)。下面是编写的脚本并且它可以工作,但运行起来非常昂贵,并且可能在运行时导致最终用户锁定。该软件部署在多个时区,因此即使我们在最短的使用时间内运行它,我们也需要它尽可能快地运行。

这是当前的脚本。它有临时表和while循环,所以你可以看到它真的可以改进。我的SQL技能需要抛光,所以我把它放在这里寻求建议。

CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)

CREATE TABLE #TEMP2(PERSONID INT,PRIMARYEMAIL INT)
CREATE INDEX IDX_TEMP2_PERSONID ON #TEMP2(PERSONID)

--Grab all the person id's that have at least one email addresses that is not primary in the db, also set  a flag for the while loop
INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail ,0 
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID 
WHERE E.primaryEmail=0


--Grab all person ID's that have at least one email address that is primary.
INSERT INTO #TEMP2
SELECT PE.PersonID, E.primaryEmail 
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=1


--SELECT * FROM #TEMP2

--Remove any customers that already have a primary email set.
DELETE FROM #TEMP WHERE PERSONID IN (SELECT DISTINCT PERSONID FROM #TEMP2)


--Debug line to see how many customers are affected.
--SELECT * FROM #TEMP


--Perfom a while loop to update the min email ID to primary.
DECLARE @INTFLAG INT
DECLARE @PERSONID INT 
SET @INTFLAG = (SELECT COUNT(*) FROM #TEMP)

--SELECT @INTFLAG

WHILE (@INTFLAG > 0)

BEGIN

SET @PERSONID =(SELECT  TOP(1) PERSONID FROM #TEMP WHERE FLAG=0)

UPDATE Account.tbEmail SET primaryEmail=1 WHERE EmailID=(SELECT MIN(EMAILID) FROM Account.tbPersonEmail where PersonID=@PERSONID)


--Update the flag on the #temp table to grab the next ID
UPDATE #TEMP SET FLAG=1 WHERE PERSONID=@PERSONID

--Reduce the intflag variable that the loop is running off of.
SET @INTFLAG=@INTFLAG-1





END

DROP TABLE #TEMP
DROP TABLE #TEMP2

4 个答案:

答案 0 :(得分:1)

单个查询,为每个人设置第一封电子邮件的primaryEmail = 1,但已有主电子邮件的人除外:

UPDATE Account.tbEmail E SET E.primaryEmail=1 
WHERE
    E.EmailID in (
        -- get min email id for each person
        SELECT min(PE.EmailID) FROM Account.tbPersonEmail PE 
        -- but exclude persons who already have primary email
        WHERE PE.PersonID NOT IN (
            SELECT PE1.PersonID
            FROM Account.tbPersonEmail PE1
            INNER JOIN Account.tbEmail E1 ON E1.EmailID=PE1.EmailID
            WHERE E1.primaryEmail=1
        )
        GROUP BY PE.PersonID
    )

答案 1 :(得分:0)

创建临时表是一种非常昂贵的方法,并且使用循环是一个坏主意,因为它们很慢,因为它们无法进行优化。典型的方法是使用子查询。首先,尝试这样做:

CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)

INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail , 0
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID 
WHERE E.primaryEmail=0 and 
PE.PersonID not in (SELECT Distinct PE2.PersonID 
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)

然后运行你的while循环。这应该有点帮助。您可以通过查看#TEMP是否与先前版本匹配来测试这是否正确。

要进一步优化,您可能需要将整个更新过程重写为单个查询。您还可以查看此内容:How can I optimize this SQL query (Using Indexes)?

答案 2 :(得分:0)

制作主电子邮件的逻辑并不好。而且将聚合函数或排名函数放在varchar列上会更糟糕。 我们也应该知道其他专栏。

我喜欢@David建议但不喜欢脚本。 在进行适当的测试之前尝试我的脚本,然后你也应该恢复。

;With CTE as
(
        SELECT PE.PersonID, E.primaryEmail ,E.EmailID
,row_number()over(order by PE.EMAILID )rn 
        FROM Account.tbPersonEmail PE WITH (NOLOCK)
        LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID 
--why left join
        WHERE E.primaryEmail=0 
)
-- IN CTE you get only those which is not updated.
-- row_number()over(order by PE.EMAILID ) is equivalent to min(emailid)
UPDATE Account.tbEmail SET primaryEmail=1 
from Account.tbEmail A inner join CTE B on A.EmailID=B.EmailID
WHERE B.rn=1

答案 3 :(得分:0)

结束了这个。

UPDATE Account.tbEmail set primaryEmail=1 


where EmailID in 
(SELECT P.Emailid from (
SELECT  DISTINCT P.PersonID,MIN(P.EmailID)AS EmailID
FROM
(SELECT PE.PersonID, E.primaryEmail,PE.EmailID
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID 
WHERE E.primaryEmail=0 and 
PE.PersonID not in (SELECT Distinct PE2.PersonID 
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E2.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)
)AS P

GROUP BY P.PersonID ) as P)

UPDATE Account.tbEmail set primaryEmail=1 where EmailID in (SELECT P.Emailid from ( SELECT DISTINCT P.PersonID,MIN(P.EmailID)AS EmailID FROM (SELECT PE.PersonID, E.primaryEmail,PE.EmailID FROM Account.tbPersonEmail PE WITH (NOLOCK) LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID WHERE E.primaryEmail=0 and PE.PersonID not in (SELECT Distinct PE2.PersonID FROM Account.tbPersonEmail PE2 WITH (NOLOCK) LEFT OUTER JOIN Account.tbEmail E2 ON E2.EmailID=PE2.EmailID WHERE E2.primaryEmail=1) )AS P GROUP BY P.PersonID ) as P)