我有按收购日期划分的表格
表结构(表名:库存)
Vendor_Name [varchar](80) NULL,
Model_Name [varchar](80) NULL,
AcquisitionDate [datetime] NOT NULL,
Qty [bigint] NOT NULL
库存表的分区功能:
CREATE PARTITION FUNCTION [Inventory_PF_New](datetime) AS RANGE LEFT FOR VALUES
(N'2012-07-01T00:00:00.000', N'2012-08-01T00:00:00.000',
N'2012-09-01T00:00:00.000', N'2012-10-01T00:00:00.000',
N'2012-11-01T00:00:00.000', N'2012-12-01T00:00:00.000',
N'2013-01-01T00:00:00.000', N'2013-02-01T00:00:00.000',
N'2013-03-01T00:00:00.000', N'2013-04-01T00:00:00.000',
N'2013-05-01T00:00:00.000', N'2013-06-01T00:00:00.000',
N'2013-07-01T00:00:00.000', N'2013-08-01T00:00:00.000',
N'2013-09-01T00:00:00.000', N'2013-10-01T00:00:00.000',
N'2013-11-01T00:00:00.000', N'2013-12-01T00:00:00.000',
N'2014-01-01T00:00:00.000', N'2014-02-01T00:00:00.000',
N'2014-03-01T00:00:00.000', N'2014-04-01T00:00:00.000',
)
我收到的每日Feed平面文件的购买日期最长可达3个月。我需要处理这些文件并将数据加载到实际的分区表(Inventory Table)。
使用批量插入/ SSIS我可以简单地将这些文件加载到临时表中但是如何使用ALTER SWITCH将每日接收的数据切换到实际的分区表中?我知道 接收分区必须为空,当我将数据加载到临时表时,我可能有三个月的数据。所以,我不能简单地执行ALTER SWITCH IN。
任何想法我如何实现这一点。我的要求是快速加载每日收到的数据,我的每日Feed可能有三个月的数据?
我的表按月使用获取日期进行分区,我收到的数据Feed可能有3到4个月的数据。如何使用ALTER SWITCH将此数据加载到实际的分区表中?
答案 0 :(得分:1)
我不确定这个问题和答案是否适合stackoverflow,因为这是一个相对复杂的架构讨论,但我会尝试保持简单的想法,让你运行它。
我假设您从SSIS加载到临时表中的原因是您可以利用BULK插入;如果你每天只做一次这样的负荷,可能不值得尝试做我的建议。只需插入目标表;完成插入的时间可能值得维持更复杂的情况。但是,如果您需要执行此加载并立即获得数据,那么我会这样做:
这应该可以让您获得BULK INSERTS的好处,并且几乎可以立即访问您的数据。从最近到最终表的涓流加载将慢慢清空ALTER SWITCH操作的目标表。
如果涓流加载可能花费的时间超过一天,您实际上可以动态构建分区视图,为每个插入日期添加一个新的分区表,并在它们被滴流到最终目标时删除这些表,但这增加了很多复杂性。
答案 1 :(得分:0)
我想我的问题是“你确定桌子被分区了吗?”一般来说,无论您是否将数据暂存到临时位置,插入正确分区的表都应正确对齐分区函数中使用的主键上的数据。
下面的示例创建了一个带有简单分区的目录,并将记录插入到分区表中(基于您泄露的结构),因为分区表需要将其分区列至少部分主键,原始示例中未显示)。提供的结果显示了单个插入后的预期文件(20140101和20140102)的增长,最后是分区中这些行的实际分配。后面的查询可以帮助您确定您的数据是否正确分配,如果没有,您可以从示例的其余部分收集一些有关如何解决这些问题的见解!
以下代码经过修改,以显示以下评论中概述的SWITCH用法:
创建测试目录:
USE master;
GO
-- Make a data directory;
-- EXECUTE xp_cmdshell 'mkdir D:\MSSQL\DATA\PartitionExample\';
-- Create a catalog for testing;
IF NOT EXISTS ( SELECT 1
FROM sys.databases
WHERE name = 'PartitionExample' )
BEGIN
--DROP DATABASE [PartitionExample];
CREATE DATABASE [PartitionExample]
ON PRIMARY (
NAME = [PartitionExample_dat],
FILENAME = 'D:\MSSQL\DATA\PartitionExample\PartitionExample.mdf',
SIZE = 3MB,
FILEGROWTH = 1MB )
LOG ON (
NAME = [PartitionExample_log],
FILENAME = 'D:\MSSQL\DATA\PartitionExample\PartitionExample.ldf',
SIZE = 512KB,
FILEGROWTH = 512KB );
END;
GO
-- Add filegroups and files for partitions;
IF NOT EXISTS ( SELECT *
FROM PartitionExample.sys.sysfiles
WHERE name LIKE 'fg[_]DateRange[_]________[_]dat' )
BEGIN
-- Filegroups
ALTER DATABASE [PartitionExample]
ADD FILEGROUP [fg_DateRange_20140101];
ALTER DATABASE [PartitionExample]
ADD FILEGROUP [fg_DateRange_20140102];
ALTER DATABASE [PartitionExample]
ADD FILEGROUP [fg_DateRange_20140103];
-- Files
ALTER DATABASE [PartitionExample]
ADD FILE (
NAME = [fg_DateRange_20140101_dat],
FILENAME = 'D:\MSSQL\DATA\PartitionExample\fg_DateRange_20140101.ndf',
SIZE = 512KB,
FILEGROWTH = 512KB )
TO FILEGROUP [fg_DateRange_20140101];
ALTER DATABASE PartitionExample
ADD FILE (
NAME = [fg_DateRange_20140102_dat],
FILENAME = 'D:\MSSQL\DATA\PartitionExample\fg_DateRange_20140102.ndf',
SIZE = 512KB,
FILEGROWTH = 512KB )
TO FILEGROUP [fg_DateRange_20140102];
ALTER DATABASE PartitionExample
ADD FILE (
NAME = [fg_DateRange_20140103_dat],
FILENAME = 'D:\MSSQL\DATA\PartitionExample\fg_DateRange_20140103.ndf',
SIZE = 512KB,
FILEGROWTH = 512KB )
TO FILEGROUP [fg_DateRange_20140103];
END;
GO
构建分区方案/功能
-- Use the newly created catalog;
USE [PartitionExample];
GO
-- Set up partition function and scheme;
IF NOT EXISTS ( SELECT 1
FROM sys.partition_functions
WHERE name = 'pf_DateRange' )
BEGIN
--DROP PARTITION SCHEME [ps_DateRange];
--DROP PARTITION FUNCTION [pf_DateRange];
CREATE PARTITION FUNCTION [pf_DateRange] ( DATETIME )
AS RANGE RIGHT FOR VALUES ( '20140101', '20140102', '20140103' );
CREATE PARTITION SCHEME [ps_DateRange]
AS PARTITION [pf_DateRange] TO ( [PRIMARY],
[fg_DateRange_20140101], [fg_DateRange_20140102], [fg_DateRange_20140103] );
END;
GO
创建分区表并填充:
-- Create table;
IF NOT EXISTS ( SELECT 1
FROM sys.objects
WHERE name = 'Inventory'
AND type = 'U' )
BEGIN
--DROP TABLE dbo.Inventory;
CREATE TABLE dbo.Inventory
(
Inventory_PK INTEGER IDENTITY( 1, 1 ) NOT NULL,
AcquisitionDate DATETIME NOT NULL,
PRIMARY KEY ( Inventory_PK, AcquisitionDate ),
Vendor_Name VARCHAR( 80 ) NULL,
Model_Name VARCHAR( 80 ) NULL,
Qty BIGINT NOT NULL
) ON ps_DateRange( AcquisitionDate );
END;
GO
-- "Stage" data, for initial population purposes
SET NOCOUNT ON;
IF NOT EXISTS ( SELECT 1
FROM dbo.Inventory )
BEGIN
DECLARE @i INTEGER;
CREATE TABLE dbo.t_StageInventory
(
Inventory_PK INTEGER IDENTITY( 1, 1 ) NOT NULL,
AcquisitionDate DATETIME NOT NULL,
PRIMARY KEY ( Inventory_PK, AcquisitionDate ),
Vendor_Name VARCHAR( 80 ) NULL,
Model_Name VARCHAR( 80 ) NULL,
Qty BIGINT NOT NULL
) ON ps_DateRange( AcquisitionDate );
SET @i = 0;
WHILE ( @i < 100 )
BEGIN
INSERT INTO dbo.t_StageInventory ( Vendor_Name, Model_Name,
AcquisitionDate, Qty )
VALUES ( 'VendorName', 'ModelName', '20140101', 1 );
SET @i = @i + 1;
END;
SET @i = 0;
WHILE ( @i < 100 )
BEGIN
INSERT INTO dbo.t_StageInventory ( Vendor_Name, Model_Name,
AcquisitionDate, Qty )
VALUES ( 'VendorName', 'ModelName', '20140102', 1 );
SET @i = @i + 1;
END;
-- Insert data into the partitioned table;
INSERT INTO dbo.Inventory ( AcquisitionDate, Vendor_Name,
Model_Name, Qty )
SELECT AcquisitionDate, Vendor_Name, Model_Name, Qty
FROM dbo.t_StageInventory;
DROP TABLE dbo.t_StageInventory;
SET NOCOUNT OFF;
END;
GO
查看数据分布:
SELECT ObjectName = OBJECT_NAME( p.object_id ),
PartitionSchemeName = ps.name,
PartitionFunctionName = pf.name,
PartitionNumber = p.partition_number,
FileGroup = fg.name,
Rows = p.rows
FROM sys.partitions p
INNER JOIN sys.indexes i
ON p.object_id = i.object_id
INNER JOIN sys.partition_schemes ps
ON i.data_space_id = ps.data_space_id
INNER JOIN sys.partition_functions pf
ON ps.function_id = pf.function_id
INNER JOIN sys.destination_data_spaces dds
ON dds.partition_scheme_id = ps.data_space_id
AND dds.destination_id = p.partition_number
INNER JOIN sys.filegroups fg
ON dds.data_space_id = fg.data_space_id
WHERE p.object_id = OBJECT_ID( 'Inventory' );
现在测试已经完成,我们可以继续为SWITCH做好准备!再次填充临时表:
新阶段:
-- "Stage" data from ETL
SET NOCOUNT ON;
DECLARE @i INTEGER;
CREATE TABLE dbo.t_StageInventory
(
Inventory_PK INTEGER IDENTITY( 1, 1 ) NOT NULL,
AcquisitionDate DATETIME NOT NULL,
PRIMARY KEY ( Inventory_PK, AcquisitionDate ),
Vendor_Name VARCHAR( 80 ) NULL,
Model_Name VARCHAR( 80 ) NULL,
Qty BIGINT NOT NULL
) ON ps_DateRange( AcquisitionDate );
SET @i = 0;
WHILE ( @i < 10 )
BEGIN
INSERT INTO dbo.t_StageInventory ( Vendor_Name, Model_Name,
AcquisitionDate, Qty )
VALUES ( 'VendorName', 'ModelName', '20140102', 1 );
SET @i = @i + 1;
END;
SET @i = 0;
WHILE ( @i < 100 )
BEGIN
INSERT INTO dbo.t_StageInventory ( Vendor_Name, Model_Name,
AcquisitionDate, Qty )
VALUES ( 'VendorName', 'ModelName', '20140103', 1 );
SET @i = @i + 1;
END;
填充登台表后,我们需要确定必须从报表稳定表中传输哪些行并移动它们。
重新暂存数据:
-- Re-stage existing partition data;
DECLARE @UpperBound DATETIME,
@LowRange DATETIME,
@HighRange DATETIME;
SET @UpperBound = '99991231';
SELECT @LowRange = MIN( CAST( pprv.value AS DATETIME ) ),
@HighRange = MAX( ISNULL( CAST( prv.value AS DATETIME ), @UpperBound ) )
FROM sys.partitions p
INNER JOIN sys.indexes i
ON p.object_id = i.object_id
INNER JOIN sys.partition_schemes ps
ON i.data_space_id = ps.data_space_id
INNER JOIN sys.partition_functions pf
ON ps.function_id = pf.function_id
INNER JOIN sys.destination_data_spaces dds
ON dds.partition_scheme_id = ps.data_space_id
AND dds.destination_id = p.partition_number
INNER JOIN sys.filegroups fg
ON dds.data_space_id = fg.data_space_id
LEFT JOIN sys.partition_range_values prv
ON ps.function_id = prv.function_id
AND p.partition_number = prv.boundary_id
LEFT JOIN sys.partition_range_values pprv
ON ps.function_id = prv.function_id
AND p.partition_number - 1 = pprv.boundary_id
WHERE p.object_id = OBJECT_ID( 't_StageInventory' )
AND rows <> 0;
INSERT INTO dbo.t_StageInventory( AcquisitionDate, Vendor_Name, Model_Name, Qty )
SELECT AcquisitionDate, Vendor_Name, Model_Name, Qty
FROM dbo.Inventory
WHERE AcquisitionDate >= @LowRange
AND AcquisitionDate < @HighRange;
SWITCH Out,然后在
CREATE TABLE dbo.t_SwapInventory
(
Inventory_PK INTEGER IDENTITY( 1, 1 ) NOT NULL,
AcquisitionDate DATETIME NOT NULL,
PRIMARY KEY ( Inventory_PK, AcquisitionDate ),
Vendor_Name VARCHAR( 80 ) NULL,
Model_Name VARCHAR( 80 ) NULL,
Qty BIGINT NOT NULL
) ON ps_DateRange( AcquisitionDate );
-- Dynamic here...
DECLARE @t_Partition TABLE
(
partition_number INTEGER
);
INSERT INTO @t_Partition ( partition_number )
SELECT DISTINCT p.partition_number
FROM sys.partitions p
WHERE p.object_id = OBJECT_ID( 't_StageInventory' )
AND p.rows <> 0;
SET @i = @@ROWCOUNT;
DECLARE @SQL NVARCHAR( MAX ),
@Partition INTEGER;
WHILE ( @i > 0 )
BEGIN
SELECT TOP 1 @Partition = partition_number
FROM @t_Partition;
DELETE @t_Partition
WHERE partition_number = @Partition;
SET @SQL = N'
ALTER TABLE dbo.Inventory
SWITCH PARTITION ' + LEFT( @Partition, 1024 ) + '
TO dbo.t_SwapInventory PARTITION ' + LEFT( @Partition, 1024 ) + ';';
EXECUTE dbo.sp_executesql @statement = @SQL;
SET @SQL = N'
ALTER TABLE dbo.t_StageInventory
SWITCH PARTITION ' + LEFT( @Partition, 1024 ) + '
TO dbo.Inventory PARTITION ' + LEFT( @Partition, 1024 ) + ';';
EXECUTE dbo.sp_executesql @statement = @SQL;
SET @i = @i - 1;
END;
GO
DROP TABLE dbo.t_SwapInventory;
DROP TABLE dbo.t_StageInventory;
SET NOCOUNT OFF;
此时,可以运行上面的查看数据分布查询以进行其他验证。