使用大表上的循环批量更新

时间:2013-06-05 21:11:14

标签: sql-server tsql sql-update

我有两个表有重叠数据的表。一张表约占另一张的90%。我需要识别表中10%的唯一记录并将其移动到其父表中。这两个表都是4亿多行,有300多列。我正在尝试的方法是添加一个标志字段来唯一地标识我需要传输的记录但是我需要更新字段并且正在努力学习逻辑。下面是我到目前为止所放在一起的内容,它会导致一个永无止境的循环。两个表中都没有空值。

Declare @counter int 
Declare @RowsEffected int 
Declare @RowsCnt int 
Declare @Err int
SELECT @COUNTER = 1
SELECT @RowsEffected = 0

while (@counter > 0)
begin
set Rowcount 10000000

update Table1
set Existsflg = 1
where exists (
Select Fields
from Table1
Except
Select Fields
from table2 )

Select @RowsCnt = @@ROWCOUNT , @Err = @@ERROR
If @Err <> 0
begin
Print 'Problem Updating the records'
end
IF @RowsCnt = 0
SELECT @COUNTER = 0 
ELSE
SELECT @RowsEffected = @RowsEffected + @RowsCnt 
PRINT 'The total number of rows effected :'+convert(varchar,@RowsEffected)     
WAITFOR DELAY '00:00:10'        
END 
SET ROWCOUNT 0
Go

谢谢!

2 个答案:

答案 0 :(得分:0)

这就是我一次这样做的。

我没有使用RowCount,我使用Select TOP(N)和“while exists”

我的“来源”dbo.Employee表在另一台服务器上。

GO
USE [$(DestinationDatabaseName)]
GO





/*

READ ME !!!

Replace
    $(SourceServer).$(SourceDatabaseName)
With                            (the Server and DatabaseName of the SOURCE data)
    (ex:)  [OtherServer].[OtherDatabase]


*/




--SubFolder: SQLReplicateReplacer
print '[uspEmployeeReplicateReplacer]'
go


IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[uspEmployeeReplicateReplacer]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[uspEmployeeReplicateReplacer]
Go


/*


declare @numberRowsAffected  int
declare @ErrorNumber int

exec [dbo].[uspEmployeeReplicateReplacer] @numberRowsAffected output , @ErrorNumber  output

print @numberRowsAffected 
print @ErrorNumber 
print ''

*/



CREATE PROCEDURE [dbo].[uspEmployeeReplicateReplacer] (
@numberRowsAffected int output  --return
,
@ErrorNumber int output
)

AS


SET NOCOUNT ON


select @ErrorNumber = 0


declare @ErrorTracker int 
declare @insertRowCount int
declare @updateRowCount int 
select @insertRowCount = 0
select @updateRowCount = 0





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL
begin
        drop table #Employeeupdate
end






CREATE TABLE #Employeeupdate (  
EmployeeKeyID int IDENTITY (1,1),

EmployeeUUID uniqueidentifier,
EmployeeLabel varchar(64),
EmployeeDescription varchar(128)


)



declare @ManualReplicationRowCount int
/* I put this value in a stored procedure, so I could change it in one place */
/* EXEC dbo.uspInternalSettingGetManualReplicationRowCount @ManualReplicationRowCount output */
Select @ManualReplicationRowCount = 1000 


declare @MaximumLoopCounter int 


select @MaximumLoopCounter = 10000



while (@MaximumLoopCounter > 0) and exists 
(


Select

    TOP 1 null


from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)


    where not exists
    (
        select null from dbo.Employee   with (nolock)   --  destinationTable
        Where

            /*
            destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1
            and
            destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2
            */

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    )

)



BEGIN


    select @MaximumLoopCounter = @MaximumLoopCounter - 1




DELETE FROM #Employeeupdate




Insert into #Employeeupdate  
(   
        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription

)

Select

    TOP (@ManualReplicationRowCount)

        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription


    from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)

    where not exists
    (
        select null from dbo.Employee   with (nolock)   --  destinationTable
        Where

            /*
            destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1
            and
            destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2
            */

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    )








SET NOCOUNT OFF
Insert into dbo.Employee 
(   
        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription

)

Select

        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription


from 
    #Employeeupdate  


SELECT @insertRowCount = @@ROWCOUNT , @ErrorTracker = @@ERROR

if @ErrorTracker <> 0
    BEGIN

        select @ErrorNumber = @ErrorTracker 
        select @MaximumLoopCounter = 0 --Bail Out !!!
    END



SET NOCOUNT ON




END --End While Loop














/*

SET NOCOUNT OFF

Update dbo.Employee
Set

  --EmployeeUUID = vart.EmployeeUUID,
EmployeeLabel = vart.EmployeeLabel,
EmployeeDescription = vart.EmployeeDescription


From

    dbo.Employee with (nolock) , [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)
Where
        --Relationship
dbo.Employee.EmployeeUUID = vart.EmployeeUUID 




SELECT @updateRowCount = @@ROWCOUNT

SET NOCOUNT ON


*/




SELECT @numberRowsAffected = @insertRowCount + @updateRowCount


print '/#Employeeupdate COUNT/'
print @numberRowsAffected 
print '-------------------------'





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL
begin
        drop table #Employeeupdate
end







SET NOCOUNT OFF


GO

GRANT EXECUTE ON dbo.uspEmployeeReplicateReplacer TO $(DBUSERNAME)

GO

答案 1 :(得分:0)

我建议您一次批量生成1M-5M,因为您需要更新大量数据。

在这种情况下我会做的是:

a)添加名为Processed(bit)的新列,该列将针对所有已处理的行进行更新

b)在临时表中选择1M行(这可能不需要,但它会使事情变得有点 清洁)

c)将所有非重复记录插入其他表

d)更新行并将其标记为进程