我一直在努力将大约1800万条记录从Oracle上传到SQL Server。问题是加载任务需要几天才能完成,即使数据不是那么多。我已经标记了相应于各自完成执行时间的部分代码。
declare @v_sql VARCHAR(MAX)
declare @column_name VARCHAR(MAX)
declare @column_name2 VARCHAR(MAX) =''
declare @column_name3 VARCHAR(MAX) =''
declare @tablename VARCHAR(MAX) ='TITLE_BLOCK_PARTS' --input parameter
declare @v_sql2 VARCHAR(MAX)=''
declare @temp_table VARCHAR(500) = '##'+@tablename+'_T'
declare @temp_table2 VARCHAR(500)
declare @v_entityid int = 14 --input parameter
declare @vcount INT = 1
declare @v_total_records bigint
DECLARE @CountResults TABLE (CountReturned bigint)
declare @v_total_ei bigint
declare @v_max_ei bigint
declare @v_total_no_col int
declare @col_to_exclude VARCHAR(MAX) = ',ID,CLASS,'
declare @stg_table vARCHAR(500) = '##'+@tablename+'_TEMP'
declare @v_success int = -1
declare @v_tablename VARCHAR(MAX) = @tablename
declare @itemnumber varchar(500) ='';
declare @rev varchar(500) ='';
declare @v_stg_sql VARCHAR(MAX) = ''
declare @v_temp_table vARCHAR(500) = '##'+@tablename+'_T_ORI'
declare @v_sql3 VARCHAR(MAX) = ''
declare @v_countrownum bigint = 1
declare @v_rownum bigint
DECLARE @v_rownumtab TABLE (ROWCountReturned bigint)
BEGIN
--SET @col_to_exclude = ''''+REPLACE(@col_to_exclude,',',''',''') +''''
--Start: Following code takes maximum 5 to 7 minutes to complete
IF EXISTS (SELECT * FROM sys.tables WHERE name = @v_tablename)
Begin
set @v_sql = ' DROP TABLE ' + @v_tablename
exec (@v_sql)
END
set @v_sql = 'SELECT *
into ' + @tablename +'
FROM [ORCLTST]..[AGILE].'+@v_tablename;
exec (@v_sql)
--End: Following code takes maximum 5 to 7 minutes to complete
--Start: Following code takes maximum 4 minutes to complete
IF OBJECT_ID('tempdb..'+@stg_table) IS NOT NULL
exec ('drop table ' + @stg_table)
SET @v_sql = 'SELECT *
INTO '+@stg_table+
'
FROM ' + @tablename;
EXEC (@v_sql);
set @v_sql = ''
set @v_total_no_col = (SELECT COUNT(COLUMN_NAME)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)
)
DECLARE get_columns CURSOR FOR
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)
--SELECT COLUMN_NAME
--FROM INFORMATION_SCHEMA.COLUMNS
--WHERE TABLE_NAME = @tablename
--AND COLUMN_NAME NOT IN (@col_to_exclude)
--@tablename
--print @sql3
OPEN get_columns
FETCH NEXT FROM get_columns INTO @column_name
WHILE @@FETCH_STATUS = 0
BEGIN
--PRINT @column_name
SET @column_name2 = replace(REPLACE(REPLACE(LTRIM(RTRIM(REPLACE(@column_name,'"',''))),')',''),'(',''),' ','')
--replace(@column_name2
--print 'ALTER TABLE ' + @sql5+ ' DROP constraint ' + @sql3
set @v_sql2+='CAST('+'['+@column_name+']'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'
SET @v_sql3 += 'CAST('+'ISNULL(['+@column_name+'],'''')'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'
SET @column_name3 += @column_name2 + ',
'+
'
'
FETCH NEXT FROM get_columns INTO @column_name
end
CLOSE get_columns
DEALLOCATE get_columns
--Print @v_sql2
set @v_sql2 = left(@v_sql2, len(@v_sql2)-5)
set @v_sql3 = left(@v_sql3, len(@v_sql3)-5)
SET @column_name3 = left(@column_name3, len(@column_name3)-7)
IF OBJECT_ID('tempdb..'+@temp_table) IS NOT NULL
exec ('drop table ' + @temp_table)
IF OBJECT_ID('tempdb..'+@temp_table+'2') IS NOT NULL
exec ('drop table ' + @temp_table+'2')
IF OBJECT_ID('tempdb..'+@v_temp_table) IS NOT NULL
exec ('drop table ' + @v_temp_table)
IF OBJECT_ID('tempdb..'+@v_temp_table+'2') IS NOT NULL
exec ('drop table ' + @v_temp_table+'2')
SET @v_sql = 'Select
ROW_NUMBER()
OVER (ORDER BY number, REV) AS Row,
'
+
@v_sql2
+
'
'
+
' INTO ' + @v_temp_table + '
from
'
+
@tablename
PRINT @v_sql
EXEC (@V_SQL)
SET @v_sql = 'Select
ROW_NUMBER()
OVER (ORDER BY number, REV) AS Row,
'
+
@v_sql3
+
'
'
+
' INTO ' + @v_temp_table+'2' + '
from
'
+
@v_temp_table
PRINT @v_sql
EXEC (@V_SQL)
SET @v_sql ='
Select ROW
,ColumnName
, ColumnValue
INTO ' + @temp_table + '
FROM
(
Select
ROW,
'
+
@v_sql3
+
'
'
+
'
'
+
' from
'
+
@v_temp_table+'2'
+
'
'
+
'
)t
UNPIVOT (ColumnValue for ColumnName
IN (
'
+
@column_name3
+
'
'
+
'
)
)UP
';
PRINT @v_sql
--END;
--/*
EXEC (@V_SQL)
set @v_sql = ''
/*set @v_sql = 'with q1 as (
SELECT t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.*
'
+
'
FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100))
+
')
,q2 as(
select Id, ColumnName, displayorder,[Group], count(*) as c_nnum
from q1
group by Id,ColumnName,displayorder,[Group])
select t1.id, t1.displayorder, t1.ColumnName, t2.columnvalue
into '+ @temp_table+'2'+
'
from q2 t1
INNER JOIN '+@temp_table+ ' t2
ON t1.ColumnName = t2.ColumnName
WHERE T1.[Group] <> ''PAGE THREE''
' */
--
set @temp_table2 = replace(@temp_table+'2', '##','');
IF EXISTS (SELECT * FROM sys.tables WHERE name = @temp_table2)
Begin
set @v_sql = ' DROP TABLE ' + @temp_table2
exec (@v_sql)
END
set @v_Sql = 'SELECT t1.Row,t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.ColumnName, t1.ColumnValue
into '+ @temp_table2 +
' FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100)) + '
and T2.[Group] <> ''PAGE THREE'''
print @v_sql
exec(@v_sql)
set @v_sql = ' CREATE INDEX idx_'+@temp_table2+'_row
ON '+ @temp_table2 + ' (row)';
print @v_sql
exec(@v_sql)
--end
--/*
--set @v_sql = 'select COUNT(*) from ' + @tablename
SELECT @v_sql = N'SELECT COUNT(*) FROM' + QUOTENAME(@tablename);
INSERT @CountResults
EXEC(@v_sql)
set @v_total_records = (SELECT CountReturned FROM @CountResults)
print '@v_total_records '+ cast(@v_total_records as varchar(5000))
set @v_max_ei = (SELECT IDENT_CURRENT ('PLMENTITYINSTANCE'))
set @v_max_ei = @v_max_ei + 1
--(select ISNULL(MAX (id),0) from PLMEntityInstance)
print '@v_max_ei '+ cast(@v_max_ei as varchar(5000))
set @v_total_ei = ISNULL(@v_total_records,0) + @v_max_ei
print '@v_total_ei '+ cast(@v_total_ei as varchar(5000))
--END;
IF EXISTS (SELECT * FROM sys.tables WHERE name = 'ITEM_STG')
Begin
set @v_sql = ' DROP TABLE ITEM_STG'
exec (@v_sql)
END
set @v_sql = 'CREATE TABLE [dbo].[ITEM_STG](
[INSTANCE_ID] [bigint] NOT NULL,
[ITEM_NUMBER] [varchar](max) NOT NULL,
[REV] [varchar](max) NOT NULL)'
exec (@v_sql)
set @v_sql = ''
--END
--/*
WHILE (@vcount <= @v_total_records)
BEGIN
INSERT INTO PLMEntityInstance (EntityId)
VALUES (@v_entityid)
SET @vcount = @vcount + 1
--print
END
set @v_sql = ''
set @v_sql = N'with c AS
(select DISTINCT ROW from ' + QUOTENAME(@temp_table2) +')
select count(*) from c
'
INSERT @v_rownumtab
EXEC(@v_sql)
set @v_rownum = (select * from @v_rownumtab)
print @v_rownum
--End: Following code takes maximum 4 minutes to complete
--end
--/*
--Start: Following code takes days to complete
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN
set @v_sql = ''
--top('+CAST(@v_total_no_col AS varchar(100))+')
set @v_sql = '
with q2
as(
select
* from '
+ @temp_table2
+
'
where row = '+ CAST(@v_countrownum AS varchar(5000)) + '
)
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)
select '+ CAST(@v_max_ei AS varchar(5000)) + ' as Entityinstanceid, t.id as column_id, t.columnvalue from q2 t
order by DisplayOrder
'
print @v_countrownum
print @v_sql
exec (@v_sql)
set @itemnumber = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'number') and EntityInstanceId = @v_max_ei)
set @rev = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'rev') and EntityInstanceId = @v_max_ei)
print @itemnumber
print @rev
SET @v_stg_sql = ''
set @v_stg_sql = '
INSERT INTO ITEM_STG (INSTANCE_ID,ITEM_NUMBER,REV)
select ' + CAST(@v_max_ei AS varchar(5000)) + ' , ' + ''''+@itemnumber+'''' + ' , ' +''''+ @rev+'''' + ''
print @v_stg_sql
exec (@v_stg_sql)
set @itemnumber = '';
set @rev = '';
SET @v_stg_sql = ''
set @v_sql = ''
SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END
--End: Following code takes days to complete
END;
正在使用的主要表是PlmEntityInstance,PlmColumnValue和ITEM_STG。 PlmColumnValue表是加载所有迁移数据的地方,此表具有EAV模型,此处代码需要很长时间才能加载数据。我该如何优化此流程?在所有必需的修改之后和加载之前在表上创建所有数据的索引(其名称存储在变量@ temp_table2中)确实有很大帮助,但仍然不够好。我昨天在当地时间2100点启动了加载过程并且已经过了15个小时,它在1800万条记录中只加载了149088条记录。请帮助!
如果没有提供足够的信息,请告知我们。
请参阅下面的等待统计数据
WaitType Wait_S Resource_S Signal_S WaitCount Percentage AvgWait_S AvgRes_S AvgSig_S
CXPACKET 657100.57 636945.69 20154.88 338749067 59.49 0.0019 0.0019 0.0001
PREEMPTIVE_OLEDBOPS 268405.70 268405.70 0.00 24041 24.30 11.1645 11.1645 0.0000
OLEDB 91213.76 91213.76 0.00 34867 8.26 2.6160 2.6160 0.0000
ASYNC_NETWORK_IO 33316.35 32352.43 963.93 4060699 3.02 0.0082 0.0080 0.0002
示例数据放在以下链接上:https://www.dropbox.com/home/Public/Query/
上述链接中提供的csv中的数据有8531条记录。
您可以使用'TITLE_BLOCK_PARTS'
初始化参数@ temp_table2答案 0 :(得分:1)
使用tally table将所有WHILE
循环替换为基于集INSERT
的集合。例如,替换
declare @vcount INT = 1
WHILE (@vcount <= @v_total_records)
BEGIN
INSERT INTO PLMEntityInstance (EntityId)
VALUES (@v_entityid)
SET @vcount = @vcount + 1
END
与
INSERT INTO PLMEntityInstance (EntityId)
SELECT @v_entityid
FROM Tally
WHERE N <= @v_total_records
可以通过多种方式创建Tally,例如,2000万行计数:
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS (SELECT 1 FROM E1 a, E1 b),
E4(N) AS (SELECT 1 FROM E2 a, E2 b) --10E+4 or 10,000 rows max
SELECT TOP 20000000
IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM E4 a, E4 b; --100,000,000 rows max
ALTER TABLE dbo.Tally
ADD CONSTRAINT PK_Tally_N
PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100;
修改强>
关于重构您的上一个WHILE
基于设置的代码。我不确定我完全理解整个剧本的作用,但是评论时间太长了。首先看一下最后一个循环的原始代码:
@v_countrownum = 1
...
@v_max_ei = ...
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN
...
SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END
让我们将它重构为迭代器查询:
...
@v_max_ei = ...
WITH iterator AS (
SELECT v_max_ei = @v_max_ei + N - 1
,v_countrownum = N
FROM tally
WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
SELECT *
FROM iterator
接下来,您的循环在每次迭代时无条件地将行添加到PLMColumnValue
和ITEM_STG
,因此我们必须使用LEFT JOIN
迭代器和源数据表并处理NULL。我看到你的代码默认缺少数据''。
此外,我还需要一些测试数据,这些数据是使用数据集中PLMColumn
字段的子集生成的。
-- test data generator
DROP TABLE title_block3;
GO
SELECT row = N, id, DisplayOrder
,columnvalue = c.NAME + '_val_0' + cast(N AS VARCHAR(3))
INTO title_block3
FROM PLMColumn c
JOIN tally ON tally.N <= 3
WHERE c.NAME IN ('number','rev' ) OR c.NAME LIKE 'text%'
SELECT * FROM title_block3;
所以最后用脚本替换你的最后一个WHILE。我使用任意变量值而不是脚本的第一部分计算。 @v_rownum = 3
根据测试数据。
declare @temp_table2 varchar(100);
declare @v_sql nvarchar(max);
declare @v_max_ei int;
declare @v_entityid int;
declare @v_total_ei int;
declare @v_rownum int;
set @temp_table2 = 'title_block3'; -- see generator
set @v_max_ei = 120000;
set @v_total_ei = 200000;
set @v_rownum = 3;
set @v_entityid = 14;
--Start: Following code takes days to complete
set @v_sql =
'WITH iterator AS (
SELECT v_max_ei = '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1
,v_countrownum = N
FROM tally
WHERE '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1 <= '+ CAST(@v_total_ei AS varchar(5000)) +
' AND N <= '+ CAST(@v_rownum AS varchar(5000)) + '
)
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)
SELECT i.v_max_ei, isnull(t.id,''''), isnull(t.columnvalue,'''')
FROM iterator i
LEFT JOIN ' + @temp_table2 + ' t ON t.row = i.v_countrownum
ORDER BY i.v_countrownum, t.DisplayOrder ';
print @v_sql;
exec (@v_sql);
WITH iterator AS (
SELECT v_max_ei = @v_max_ei + N - 1
,v_countrownum = N
FROM tally
WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
INSERT INTO ITEM_STG (INSTANCE_ID, ITEM_NUMBER, REV)
SELECT i.v_max_ei
-- unpivot
, isnull(max(CASE c.Name WHEN 'number' THEN Value END),'')
, isnull(max(CASE c.Name WHEN 'rev' THEN Value END),'')
FROM iterator i
LEFT JOIN PLMColumnValue cv ON cv.EntityInstanceId = i.v_max_ei
LEFT JOIN PLMColumn c ON cv.columnid = c.id
AND c.EntityId = @v_entityid
AND c.Name IN ( 'number' , 'rev')
GROUP BY i.v_max_ei;
-- check it
SELECT * FROM ITEM_STG;
SELECT * FROM PLMColumnValue;