我致力于自动将CSV数据导入数据库。这时我的代码会生成一个名为#temp1
的表,如下所示:
CompetitionID DateKickOff TimeKickOff TeamIDHome TeamIDAway ScoreHome ScoreAway
--------------- -------------------------- ------------------- ----------- ------------ ----------- ------------
2 2013-03-08 00:00:00.000 14:02:00.0000000 21 43 0 4
此时我需要检查数据库中是否存在相同的DateKickOff
和TeamIDHome
值组合,以避免导入重复数据。如果这个组合都准备好存在于表Match
的一行上,那么我需要有效地跳过以下代码:
INSERT INTO Match
SELECT *
FROM #temp1
--Add MatchID column to Temp Data file and fill it with the most recent match ID
ALTER TABLE #CSVTest_Data
ADD MatchID INT
UPDATE #CSVTest_Data
SET MatchID = (SELECT TOP 1 MatchID
FROM Match
ORDER BY MatchID DESC)
INSERT INTO Data (MatchID,
OddsFirstTimeTaken,
OddsLastTimeTaken,
MarketName,
Outcome,
Odds,
NumberOfBets,
VolumeMatched,
InPlay)
SELECT MatchID,
FirstTimeTaken,
LatestTimeTaken,
Market,
Outcome,
Odds,
NumberOfBets,
VolumeMatched,
InPlay
FROM #CSVTest_Data
显然,如果数据不是重复条目,则需要运行上面的代码。我真的很感激这方面的一些帮助。
编辑:只是为了澄清,数据的比较需要在“插入匹配”之前进行。代码发生。如果数据不重复,SQL Server将在Match表中增加MatchID的主键。然后,在写入我的数据'之前,我将获得这个新的MatchID值并将其写入我的第二个临时表。表。如果没有新的条目添加到匹配'表,那么没有数据必须写入'数据'表
答案 0 :(得分:3)
您可以使用EXCEPT
关键字:
WITH NewData AS (
SELECT FirstTimeTaken
, LatestTimeTaken
, Market
, Outcome
, Odds
, NumberOfBets
, VolumeMatched
, InPlay
FROM #CSVTest_Data -- Coming data
EXCEPT --Minus
SELECT FirstTimeTaken
, LatestTimeTaken
, Market
, Outcome
, Odds
, NumberOfBets
, VolumeMatched
, InPlay
FROM Data --Existing Data
)
INSERT INTO Data (OddsFirstTimeTaken, OddsLastTimeTaken, MarketName, Outcome, Odds, NumberOfBets, VolumeMatched, InPlay)
SELECT FirstTimeTaken, LatestTimeTaken, Market, Outcome, Odds, NumberOfBets, VolumeMatched, InPlay
FROM NewData --Insert New Data only
编辑:
如果您拥有主要身份密钥,即使在插入语句中也不需要使用它们。 SQL Server将弄清楚如何处理它们。
答案 1 :(得分:0)
试试这个,
INSERT INTO Data (MatchID,
OddsFirstTimeTaken,
OddsLastTimeTaken,
MarketName,
Outcome,
Odds,
NumberOfBets,
VolumeMatched,
InPlay)
SELECT t.MatchID,
t.FirstTimeTaken,
t.LatestTimeTaken,
t.Market,
t.Outcome,
t.Odds,
t.NumberOfBets,
t.VolumeMatched,
t.InPlay
FROM #CSVTest_Data as t
left join Data as d on t.DateKickOff = d.DateKickOff
and t.TeamIDHome = d.TeamIDHome
where d.DateKickOff is null and d.TeamIDHome is null
答案 2 :(得分:0)
这可以通过对每个表格的重复数据进行单独检查来实现:
INSERT INTO Match
SELECT *
FROM #temp1
EXCEPT
SELECT CompetitionID, DateKickOff, TimeKickOff, TeamIDHome, TeamIDAway, ScoreHome, ScoreAway
FROM Match
DELETE #CSVTest_Data
FROM #CSVTest_Data d
WHERE EXISTS( SELECT * from Data d2 WHERE
d.FirstTimeTaken = d2.OddsFirstTimeTaken AND
d.LatestTimeTaken = d2.OddsLastTimeTaken AND
d.Market = d2.MarketName AND
d.Outcome = d2.Outcome AND
d.Odds = d2.Odds AND
d.NumberOfBets = d2.NumberOfBets AND
d.VolumeMatched = d2.VolumeMatched AND
d.InPlay = d2.InPlay)
--Add MatchID column to Temp Data file and fill it with the most recent match ID
ALTER TABLE #CSVTest_Data ADD MatchID INT
update #CSVTest_Data
Set MatchID = (SELECT TOP 1 MatchID FROM BetfairFootballDB..Match
ORDER BY MatchID DESC)
INSERT INTO BetfairFootballDB..Data (MatchID, OddsFirstTimeTaken, OddsLastTimeTaken, MarketName, Outcome, Odds, NumberOfBets, VolumeMatched, InPlay)
SELECT MatchID, FirstTimeTaken, LatestTimeTaken, Market, Outcome, Odds, NumberOfBets, VolumeMatched, InPlay
FROM #CSVTest_Data