我需要重新设计一个表,该表包含每日日志并具有数亿行。此概念仅将数据保留一个月。该表将使用datetime字段作为分区键每天进行分区。因此,例如对于三月,我需要具有31个文件组和31个分区。一旦进入4月,该过程将必须插入4月1日的分区,并删除3月1日的数据和文件组。
我对分区和文件组的创建很满意,我正在寻找有关如何创建自动删除过程的建议。也许是一个SQL作业,它将使用dateadd(m,-1,getdate())
删除分区?还有其他我想念的东西吗?
其他问题:我将创建year_month_day_partitions和相应的文件组,直到2020年12月31日。有什么方法可以创建这些自动别名,还是必须在时间到时手动运行脚本?
答案 0 :(得分:0)
滑动窗口分区维护通常使用计划的脚本或存储过程(SQL Server代理作业或其他计划系统)完成。应该计划维护,以避免在SPLIT
和MERGE
期间进行昂贵的数据移动,因为与正常DML操作相比,这需要大约四倍的日志记录。为此,请确保包含边界值的分区在MERGE
之前为空,并且在SPLIT
时没有行大于指定的边界。我建议以后再创建一些额外的分区作为缓冲区,以免在维护未按计划进行的情况下移动数据。
下面是一个示例日常滑动窗口维护脚本。由于您正在使用SQL Server 2005,并且在SQL Server 2016中引入了分区级别TRUNCATE
,因此使用了类似的分区登台表进行清除。请注意,SQL Server 2005不受支持。
我从您的评论中看到,您相信每个分区一个单独的文件组/文件对于删除分区可能很有用,但事实并非如此。此示例对所有分区使用单个文件组。
--example setup
CREATE PARTITION FUNCTION PF_Date (datetime) AS
RANGE RIGHT FOR VALUES();
CREATE PARTITION SCHEME PS_LogTable AS
PARTITION PF_Date ALL TO ([PRIMARY]);
DECLARE @PartitionBoundaryDate datetime = DATEADD(day, -31, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
WHILE @PartitionBoundaryDate < DATEADD(day, 1, GETDATE())
BEGIN
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate)
END;
CREATE TABLE dbo.LogTable(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
CREATE TABLE dbo.LogTable_Staging(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
GO
--example partition maintenance scheduled nightly after midnight
BEGIN TRY
SET NOCOUNT ON;
SET XACT_ABORT ON;
DECLARE @RetentionDays int = 31;
DECLARE @FutureDays int = 7;
DECLARE @OldestRetainedDate datetime = DATEADD(day, -@RetentionDays, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
DECLARE @LatestRetainedDate datetime = DATEADD(day, DATEDIFF(day, '', GETDATE()), '');
DECLARE @LatestFutureBoundaryDate datetime = DATEADD(day, @FutureDays, @LatestRetainedDate);
DECLARE @PartitionBoundaryDate datetime;
DECLARE @Message nvarchar(2048);
--make sure staging table is empty
TRUNCATE TABLE dbo.LogTable_Staging;
BEGIN TRAN;
--aquire exclusive table lock to avoid deadlocking during maintenance
SELECT TOP(0) @PartitionBoundaryDate = DateColumn FROM dbo.LogTable WITH(TABLOCKX);
--purge partition 1 in case data older than the first boundary was inserted
SET @Message = 'Purging partition 1';
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION 1 TO
dbo.LogTable_Staging PARTITION 1;
TRUNCATE TABLE dbo.LogTable_Staging;
--purge and remove expired partitions
DECLARE @PartitionBoundaries TABLE(PartitionBoundaryDate datetime NOT NULL PRIMARY KEY);
INSERT INTO @PartitionBoundaries(PartitionBoundaryDate)
SELECT CAST(prv.value AS datetime)
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) < @OldestRetainedDate;
DECLARE ExpiredPartitionBoundaries CURSOR LOCAL FAST_FORWARD FOR
SELECT PartitionBoundaryDate
FROM @PartitionBoundaries;
OPEN ExpiredPartitionBoundaries;
WHILE 1 = 1
BEGIN
FETCH NEXT FROM ExpiredPartitionBoundaries INTO @PartitionBoundaryDate;
IF @@FETCH_STATUS = -1 BREAK;
SET @Message = 'Purging data for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate) TO
dbo.LogTable_Staging PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate);
TRUNCATE TABLE dbo.LogTable_Staging;
ALTER PARTITION FUNCTION PF_Date() MERGE RANGE(@PartitionBoundaryDate);
END;
CLOSE ExpiredPartitionBoundaries;
DEALLOCATE ExpiredPartitionBoundaries;
--create partitions for future days
SET @PartitionBoundaryDate = DATEADD(day, 1, @LatestRetainedDate);
WHILE @PartitionBoundaryDate < = @LatestFutureBoundaryDate
BEGIN
IF NOT EXISTS(SELECT 1
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) = @PartitionBoundaryDate
)
BEGIN
SET @Message = 'Creating partition for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
END;
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate);
END;
COMMIT;
END TRY
BEGIN CATCH
IF @@TRANCOUNT > 0 ROLLBACK;
--better to use THROW in SQL 2012 and later
DECLARE
@ErrorNumber int
,@ErrorMessage nvarchar(2048)
,@ErrorSeverity int
,@ErrorState int
,@ErrorLine int;
SELECT
@ErrorNumber =ERROR_NUMBER()
,@ErrorMessage =ERROR_MESSAGE()
,@ErrorSeverity = ERROR_SEVERITY()
,@ErrorState =ERROR_STATE()
,@ErrorLine =ERROR_LINE();
RAISERROR('Error %d caught at line %d: %s'
,@ErrorSeverity
,@ErrorState
,@ErrorNumber
,@ErrorLine
,@ErrorMessage);
END CATCH;
GO