使用SSIS包加载数据时理解间歇性不一致的问题

时间:2013-03-07 15:11:11

标签: sql sql-server-2008 ssis common-table-expression

问题

在过去的几个月中,下面描述的程序在其运行的绝大部分时间内都没有任何问题(2008年r2)。但是,我们有三个错误连接数据的实例。问题是,导致这种情况的原因以及如何解决这个问题?

DATA_PreImp

    sourceid    col01   col02   col03   col04   col...
    100001      John    Smith   
    100002      Calvin  Klein
    100003      Peter   Parker
    100004      Moe     Greene

通常渲染的结果是属性正确连接到Items_Main,但有时(小于1%)订单被加扰,因此col01的值没有连接到与其余部分相同的Items_Main。列。

任何有关造成这种情况的见解都将非常受欢迎。

数据移动程序

我们有一个SSIS包,它将数据从名为DATA_PreImp的平面表传输到由三个相关表组成的结构(基于属性)。

  • Items_Main应包含DATA_PreImp
  • 中每行的一行
  • Items_Featurevalues为DATA_PreImp
  • 中每行的每个列值包含一行
  • Items_MainRel包含Items_Main和Items_FeatureValues
  • 之间的连接

SSIS包中的第一步将DATA_PreImp中的数据插入到Items_Main中,并将生成的标识符插入到空DATA_PreImpMappingTMP表的TARGET_ID列中。

insert into items_main(creationdate, status)
output inserted.itemid into DATA_PreImpMappingTMP(TARGET_ID)
select getdate(), '0' from data_preimp
order by sourceid asc;

SSIS包中的第二步使用TARGET_ID(最初为Itemid)填充Items_MainRel表,并使用该功能的标识符(在本例中为5)。

insert into items_mainrel(itemid, featureid)
output inserted.itemrelid into DATA_PreImpMapping2TMP(INDREL_ID)
select TARGET_ID, 5 from DATA_PreImpMappingTMP
order by TARGET_ID asc;

第三步是使用DATA_PreImp中的SOURCE_ID填充DATA_PreImpMapping2TMP表中的SOURCE_ID列。

with cte as (select sourceid, row_number() over (order by sourceid asc) as row from data_preimp)
update m set m.SOURCE_ID = s.sourceid, m.FEAT_ID = 5
from DATA_PreImpMapping2TMP as m
join cte as s on s.row = m.ROW;

最后一步是使用DATA_PreImpMapping2TMP和DATA_PreImp中的数据填充Items_FeatureValues表。

insert into items_featurevalues(itemrelid, languageid, fnvarchar)
select DATA_PreImpMapping2TMP.INDREL_ID, 0, data_preimp.col01
from DATA_PreImpMapping2TMP
join data_preimp on (DATA_PreImpMapping2TMP.SOURCE_ID = data_preimp.sourceid)
where FEAT_ID = 5

数据表结构

以下是创建场景所需的内容:

CREATE TABLE [dbo].[DATA_PreImp](
    [sourceid] [bigint] IDENTITY(1,1) NOT NULL,
    [col01] [nvarchar](500) NULL,
    [col02] [nvarchar](500) NULL,
    [col03] [nvarchar](500) NULL,
    [col04] [nvarchar](500) NULL,
    [col05] [nvarchar](500) NULL,
    [col06] [nvarchar](500) NULL,
    [col07] [nvarchar](500) NULL,
    [col08] [nvarchar](500) NULL,
    [col09] [nvarchar](500) NULL,
    [col10] [nvarchar](500) NULL,
 CONSTRAINT [PK_DATA_PreImp] PRIMARY KEY CLUSTERED 
(
    [sourceid] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[DATA_PreImpMappingTMP](
    [ROW] [int] IDENTITY(1,1) NOT NULL,
    [TARGET_ID] [int] NULL,
PRIMARY KEY CLUSTERED 
(
    [ROW] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[Items_Main](
    [Itemid] [int] IDENTITY(1,1) NOT NULL,
    [creationDate] [smalldatetime] NOT NULL,
    [status] [int] NOT NULL,
    [purchdate] [smalldatetime] NULL,
    [logindate] [smalldatetime] NULL,
 CONSTRAINT [PK_Items_Main] PRIMARY KEY CLUSTERED 
(
    [Itemid] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]


CREATE TABLE [dbo].[DATA_PreImpMapping2TMP](
    [ROW] [int] IDENTITY(1,1) NOT NULL,
    [SOURCE_ID] [int] NULL,
    [INDREL_ID] [int] NULL,
    [FEAT_ID] [int] NULL,
PRIMARY KEY CLUSTERED 
(
    [ROW] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

CREATE TABLE [dbo].[Items_Features](
    [featureId] [int] IDENTITY(1,1) NOT NULL,
    [featureRef] [varchar](15) NOT NULL,
    [featureName] [varchar](50) NOT NULL,
    [creationDate] [smalldatetime] NOT NULL,
    [status] [int] NOT NULL,
    [fieldType] [varchar](50) NOT NULL,
    [featureType] [int] NOT NULL,
    [featureDesc] [varchar](500) NULL,
 CONSTRAINT [PK_Items_Features] PRIMARY KEY CLUSTERED 
(
    [featureId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]) ON [PRIMARY]


CREATE TABLE [dbo].[Items_MainRel](
    [ItemRelId] [int] IDENTITY(1,1) NOT NULL,
    [Itemid] [int] NOT NULL,
    [featureId] [int] NOT NULL,
 CONSTRAINT [PK_Items_MainRel] PRIMARY KEY CLUSTERED 
(
    [ItemRelId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Items_FeatureValues](
    [valueId] [int] IDENTITY(1,1) NOT NULL,
    [ItemRelId] [int] NOT NULL,
    [languageId] [int] NOT NULL,
    [FnVarChar] [nvarchar](250) NULL,
    [FInt] [int] NULL,
    [FImage] [int] NULL,
    [FNText] [ntext] NULL,
    [FSmallDateTime] [smalldatetime] NULL,
 CONSTRAINT [PK_Items_FeatureValues] PRIMARY KEY CLUSTERED 
(
    [valueId] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

GO    

ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Features] FOREIGN KEY([featureId])
REFERENCES [dbo].[Items_Features] ([featureId])
GO

ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Features]
GO

ALTER TABLE [dbo].[Items_MainRel]  WITH CHECK ADD  CONSTRAINT [FK_Items_MainRel_Items_Main] FOREIGN KEY([Itemid])
REFERENCES [dbo].[Items_Main] ([Itemid])
GO

ALTER TABLE [dbo].[Items_MainRel] CHECK CONSTRAINT [FK_Items_MainRel_Items_Main]
GO


ALTER TABLE [dbo].[Items_FeatureValues]  WITH CHECK ADD  CONSTRAINT [FK_Items_FeatureValues_Items_MainRel] FOREIGN KEY([ItemRelId])
REFERENCES [dbo].[Items_MainRel] ([ItemRelId])
ON DELETE CASCADE
GO

ALTER TABLE [dbo].[Items_FeatureValues] CHECK CONSTRAINT [FK_Items_FeatureValues_Items_MainRel]
GO


INSERT INTO DATA_PreImp (col01,col02,col03,col04) 
VALUES('John', 'Smith', '1964', 'NewYork'),
        ('Calvin', 'Klein', '1960', 'Washington D. C.'),
        ('Peter', 'Parker', '1974', 'Losangles'),
        ('Moe', 'Greene', '1928', 'Lasvegas')


INSERT INTO Items_Features (featureRef, featureName, creationDate, [status], fieldType, featureType, featureDesc)
VALUES ('firstname','First_Name', GETDATE(), 0, 'FnVarChar', 3, 'FirstName'),
    ('lastname','Last_Name', GETDATE(), 0, 'FnVarChar', 3, 'LastName'),
    ('Birth','Birth', GETDATE(), 0, 'FnVarChar', 3, 'Birth'),
    ('City','City', GETDATE(), 0, 'FnVarChar', 3, 'City')  

1 个答案:

答案 0 :(得分:2)

问题是计算列CUST_CD。经过大量的研究,似乎BULK INSERT不喜欢复杂的计算类型(参见Using SQL Server spatial types in SSIS data load)。解决方案是删除计算列,并使其成为varchar(20)NULL。然后我创建了一个新的执行SQL任务,它使用计算值更新任何NULL行。