我有一张非常大的桌子~55,000,000条记录。 索引已添加到最常用的列中,但表仍然非常慢。
是否有关于如何改进表格性能的建议? 我考虑过分区表,但不确定是否有必要。
--Table
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[EngineRecord](
[Id] [uniqueidentifier] NOT NULL,
[CreateDate] [datetime] NOT NULL,
[ChangeDate] [datetime] NOT NULL,
[CompanyId] [uniqueidentifier] NOT NULL,
[DriverEmployeeId] [uniqueidentifier] NOT NULL,
[EobrDeviceId] [uniqueidentifier] NOT NULL,
[EobrTimestampUtc] [datetime] NOT NULL,
[EobrOverallStatus] [int] NOT NULL,
[Speedometer] [decimal](14, 4) NOT NULL,
[Odometer] [decimal](14, 4) NOT NULL,
[Tachometer] [decimal](14, 4) NOT NULL,
[GpsTimestampUtc] [datetime] NULL,
[GpsLatitude] [decimal](18, 8) NULL,
[GPSLongitude] [decimal](18, 8) NULL,
[RecordType] [int] NOT NULL,
[FuelEconomyAverage] [decimal](8, 4) NOT NULL,
[FuelEconomyInstant] [decimal](8, 4) NOT NULL,
[FuelUseTotal] [decimal](14, 4) NOT NULL,
[BrakePressure] [decimal](8, 4) NOT NULL,
[CruiseControlSet] [bit] NOT NULL,
[TransmissionAttained] [nvarchar](2) NULL,
[TransmissionSelected] [nvarchar](2) NULL,
[IsProcessed] [bit] NOT NULL,
[LastChangedByUserId] [uniqueidentifier] NOT NULL,
CONSTRAINT [PK_EngineRecord] PRIMARY KEY NONCLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY],
CONSTRAINT [NK_EngineRecord] UNIQUE CLUSTERED
(
[CompanyId] ASC,
[EobrDeviceId] ASC,
[EobrTimestampUtc] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[EngineRecord] WITH NOCHECK ADD CONSTRAINT [FK_EngineRecord_CompanyLevel] FOREIGN KEY([CompanyId])
REFERENCES [dbo].[CompanyLevel] ([Id])
GO
ALTER TABLE [dbo].[EngineRecord] CHECK CONSTRAINT [FK_EngineRecord_CompanyLevel]
GO
ALTER TABLE [dbo].[EngineRecord] WITH NOCHECK ADD CONSTRAINT [FK_EngineRecord_Employee] FOREIGN KEY([DriverEmployeeId])
REFERENCES [dbo].[Employee] ([Id])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[EngineRecord] CHECK CONSTRAINT [FK_EngineRecord_Employee]
GO
ALTER TABLE [dbo].[EngineRecord] WITH NOCHECK ADD CONSTRAINT [FK_EngineRecord_EobrDevice] FOREIGN KEY([EobrDeviceId])
REFERENCES [dbo].[EobrDevice] ([Id])
GO
ALTER TABLE [dbo].[EngineRecord] CHECK CONSTRAINT [FK_EngineRecord_EobrDevice]
GO
---------------------
--Indexes/Constraints
---------------------
ALTER TABLE [dbo].[EngineRecord] ADD CONSTRAINT [PK_EngineRecord] PRIMARY KEY NONCLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [NC_EngineRecord_Employee] ON [dbo].[EngineRecord]
(
[DriverEmployeeId] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [NC_RecordType] ON [dbo].[EngineRecord]
(
[RecordType] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
ALTER TABLE [dbo].[EngineRecord] ADD CONSTRAINT [NK_EngineRecord] UNIQUE CLUSTERED
(
[CompanyId] ASC,
[EobrDeviceId] ASC,
[EobrTimestampUtc] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_EngineRecord_DBA] ON [dbo].[EngineRecord]
(
[CompanyId] ASC,
[GpsLatitude] ASC,
[GPSLongitude] ASC
)
INCLUDE ( [EobrDeviceId],
[EobrTimestampUtc]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [NC_IsProcessed] ON [dbo].[EngineRecord]
(
[IsProcessed] ASC
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 80) ON [PRIMARY]
GO
编辑:
这是一个需要花费一些时间才能运行的sproc。
CREATE PROCEDURE [dbo].[EngineRecord__GetEobrListToProcessByRecordType]
@RecordTypeEnum int
AS
DECLARE @ChangeHistory bit -- dummy variable for VS 2008 database project
SET NOCOUNT ON
SELECT EobrDevice.[Id] as EobrDeviceId,
EobrDevice.[UnitId],
CompanyGroupRoot.[Id] as CGRootId,
CompanyGroup.[Id] as CompanyGroupId,
EobrDevice.[CompanyId]
FROM dbo.EobrDevice
INNER JOIN dbo.CompanyLevel ON EobrDevice.[CompanyId] = CompanyLevel.[Id]
INNER JOIN dbo.CompanyGroup ON CompanyLevel.ParentGroupId = CompanyGroup.[Id]
INNER JOIN dbo.CompanyGroupRoot ON CompanyGroup.CGRootId = CompanyGroupRoot.[Id]
WHERE EobrDevice.[Id] IN ( SELECT DISTINCT EngineRecord.EobrDeviceId FROM dbo.EngineRecord WHERE IsProcessed = 0 AND RecordType = @RecordTypeEnum )
AND EobrDevice.UnitId IS NOT NULL
编辑2:
这是我们每晚都要清除旧记录的事情。这总是需要很多时间。
DECLARE @dt6MonthsPrior datetime
SET @dt6MonthsPrior = DATEADD(m, -6, getdate())
SELECT * FROM EngineRecord
WHERE EngineRecord.EobrTimeStampUtc < @dt6MonthsPrior
ORDER BY EobrTimestampUtc ASC
答案 0 :(得分:2)
WHERE
条件中的所有字段都不包含在索引中。索引这些字段将有所帮助。如果没有更彻底地了解表的使用方式,就无法确定其他指数的效果。
如果你真的想要这个查询飞行,你可以在里程表和转速表上有一个聚集索引,但鉴于该表的其他用途,这可能是不合理的。
更新
你的第二个存储过程看起来似乎不应该非常缓慢,它似乎唯一可以帮助那个日期的索引。
这些天有5500万条记录不是那么大,我不是分区专家,但是我不认为通过分区你的桌面会有什么改进,除非我期待,我通常不会打扰一个超过几亿条记录的表,但在生产环境中,分区还有其他好处。您是否确定硬件对您所看到的糟糕表现不负责任? SQL Server中有许多设置/功能也会影响性能。
答案 1 :(得分:1)
这样的索引可能有助于此特定查询:
CREATE INDEX x ON dbo.EngineRecord(Odometer, Tachometer) WHERE FuelUseTotal IS NOT NULL;
如果您按时间戳停止排序,这将对您有所帮助。
答案 2 :(得分:1)
您知道如何获得执行计划吗?您没有关于tach或odo或FuelUse的索引,因此您的示例查询将导致全表扫描。在Sql Management Studio中,右键单击查询窗口,选择“包含实际执行计划”,然后运行查询。您将看到一个输出,向您解释SQL Server实际运行查询必须执行的步骤。一旦花时间了解执行计划,这可能非常有启发性。
此外,您可能希望调查覆盖索引。如果您经常使用某些查询,这些可能会产生巨大差异。当然,与任何索引一样,添加/删除
时会产生更多开销答案 3 :(得分:0)
像Goat CO建议的WHERE
中的字段建议是一个好的开始,我还建议将WHERE
条件移动到第一个INNER JOIN
,这样就创建了临时表在第一次INNER JOIN
之后进一步处理已经小得多(我已经看到它表现奇迹)
SELECT EobrDevice.[Id] as EobrDeviceId,
EobrDevice.[UnitId],
CompanyGroupRoot.[Id] as CGRootId,
CompanyGroup.[Id] as CompanyGroupId,
EobrDevice.[CompanyId]
FROM dbo.EobrDevice
INNER JOIN dbo.CompanyLevel
ON EobrDevice.UnitId IS NOT NULL
AND EobrDevice.[CompanyId] = CompanyLevel.[Id]
AND EobrDevice.[Id] IN (
SELECT DISTINCT EngineRecord.EobrDeviceId
FROM dbo.EngineRecord
WHERE IsProcessed = 0
AND RecordType = @RecordTypeEnum
)
INNER JOIN dbo.CompanyGroup ON CompanyLevel.ParentGroupId = CompanyGroup.[Id]
INNER JOIN dbo.CompanyGroupRoot ON CompanyGroup.CGRootId = CompanyGroupRoot.[Id]
我还首先调整了EobrDevice.UnitId IS NOT NULL
条件,以便只有在满足条件时才能检查其他表并运行子查询。
答案 4 :(得分:0)
PARTITIONNING INDEXES会对您的表现产生影响。但它们必须在适当的单独驱动器内完成。您不提供有关硬件的信息(您使用的是什么,NAS?SAS Drives?...)
此外,规范化并不总是关于流程目标的最佳选择,尤其是在分析目的方面。主表中的一些字段(CompanyLevel,CompanyGroup)非规范化会对您的选择产生更好的影响 - 好吧,每位大厨都有自己的厨房,所以让我们跳过这个讨论......
索引构建不符合清除数据的方式。如果您决定更改
,您将获得更好的表现[EobrTimestampUtc] ASC
更改为
[EobrTimestampUtc] DESC
会影响EngineRecord.EobrTimeStampUtc < @dt6MonthsPrior