我正在尝试微调使用带有ROW_NUMBER
个PARTITION BY
参数的SELECT SDP.SuspectID
,SDP.RecordID
,SDP.Field
,SDP.FieldEntryDate
,SDP.ScreenDate
,SDP.SuspectDetails
,CONVERT(VARCHAR(10),SDP.PeriodEndDate,120) AS PeriodEndDate
,ROW_NUMBER() OVER (PARTITION BY SDP.recordID, SDP.Field, SDP.ScreenerID, SDP.PeriodEndDate, SDP.PeriodID, SDP.[Source], SDP.SuspectDetails ORDER BY SDP.UploadDate) AS RowNum
,SDP.DatabaseAccountCode
,SDP.RecordUpdateType
INTO #Temp
FROM dbo.SuspectDataPoint
AS SDP
LEFT JOIN dbo.Screener
AS S
ON S.screenerID = SDP.screenerID
WHERE SDP.ScreenerId not in (719)
AND S.DatabaseName = 'db'
AND CONVERT(DATE, SDP.FieldEntryDate) > DATEADD(dd,-24,GETDATE())
的查询来从db获取唯一记录。查询与下面完全相同:
ROW_NUMBER
我尝试手动查找此查询中效率最低的部分,结果表明它与ROW_NUMBER
的运行时间比没有它的情况长10倍。
我还试图检查执行计划以确认发现,但有些事情我不明白。如果我按上面给出的那样运行查询,执行计划会显示最大的成本是插入临时表(45%)。当我在没有ROW_NUMBER
行的情况下运行相同的查询时,情况也是这样,只是%相差一点(61%)。我理解在第一个查询中,SORT
操作发生在ROW_NUMBER
,但与其他查询相比,它相当便宜(25%)。然而,正如我所说,没有和/****** Object: Table [dbo].[Screener] Script Date: 10/17/2014 5:53:51 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[Screener](
[ScreenerID] [int] NOT NULL,
[ScreenerName] [varchar](255) NOT NULL,
[ScreenerDescription] [nvarchar](4000) NULL,
[Script] [text] NULL,
[HitRate] [numeric](10, 2) NOT NULL DEFAULT ((1)),
[CreatedOn] [date] NULL,
[CreatedBy] [varchar](7) NULL,
[SuspectReason] [nvarchar](4000) NULL,
[IsExtremeOutlier] [bit] NOT NULL DEFAULT ((0)),
[DatabaseName] [varchar](20) NOT NULL,
PRIMARY KEY CLUSTERED
(
[ScreenerID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
/****** Object: Table [dbo].[SuspectDataPoint] Script Date: 10/17/2014 5:53:52 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[SuspectDataPoint](
[SuspectID] [int] IDENTITY(1,1) NOT FOR REPLICATION NOT NULL,
[RecordID] [varchar](30) NOT NULL,
[Field] [varchar](50) NOT NULL,
[ScreenerID] [int] NOT NULL,
[ScreenDate] [datetime] NOT NULL,
[SuspectDetails] [nvarchar](4000) NULL,
[PeriodEndDate] [datetime] NULL,
[FieldEntryDate] [datetime] NOT NULL,
[OriginalValue] [nvarchar](4000) NULL,
[Source] [nvarchar](300) NULL,
[UniqueSystemID] [int] NOT NULL,
[DatabaseAccountCode] [varchar](50) NULL,
[RecordUpdateType] [varchar](60) NULL,
[UploadDate] [datetime] NOT NULL DEFAULT (getdate()),
[PeriodID] [varchar](20) NULL,
PRIMARY KEY CLUSTERED
(
[SuspectID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_Screener_DatabaseName] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_Screener_DatabaseName] ON [dbo].[Screener]
(
[DatabaseName] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_DatabaseAccountCode] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_DatabaseAccountCode] ON [dbo].[SuspectDataPoint]
(
[DatabaseAccountCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_Field] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_Field] ON [dbo].[SuspectDataPoint]
(
[Field] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_RecordID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_RecordID] ON [dbo].[SuspectDataPoint]
(
[RecordID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [in_n_SuspectDataPoint_ScreenerID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_ScreenerID] ON [dbo].[SuspectDataPoint]
(
[ScreenerID] ASC
)
INCLUDE ( [ScreenDate]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [in_n_SuspectDataPoint_UniqueSystemID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_UniqueSystemID] ON [dbo].[SuspectDataPoint]
(
[UniqueSystemID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_FieldEntryDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_FieldEntryDate_Index] ON [dbo].[SuspectDataPoint]
(
[FieldEntryDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [SuspectDataPoint_RecordUpdateType_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_RecordUpdateType_Index] ON [dbo].[SuspectDataPoint]
(
[RecordUpdateType] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_ScreenDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_ScreenDate_Index] ON [dbo].[SuspectDataPoint]
(
[ScreenDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [SuspectDataPoint_Source_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_Source_Index] ON [dbo].[SuspectDataPoint]
(
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_SuspectID_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE UNIQUE NONCLUSTERED INDEX [SuspectDataPoint_SuspectID_Index] ON [dbo].[SuspectDataPoint]
(
[SuspectID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_UploadDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_UploadDate_Index] ON [dbo].[SuspectDataPoint]
(
[UploadDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Screener] WITH CHECK ADD CONSTRAINT [FK_Screener_DatabaseInfo] FOREIGN KEY([DatabaseName])
REFERENCES [dbo].[DatabaseInfo] ([DatabaseName])
ON UPDATE CASCADE
GO
ALTER TABLE [dbo].[Screener] CHECK CONSTRAINT [FK_Screener_DatabaseInfo]
GO
线的实际时间是非常不同的(1s对13s平均)。任何人都可以向我解释一下吗?我是否错误地阅读了该计划?
编辑:添加索引视图
编辑:添加架构和索引脚本
{{1}}
答案 0 :(得分:0)
我怀疑并行性位正在扭曲执行计划中的百分比。
另一方面,“计算标量”发生在“with ROW_NUMBER”计划中的并行排序之后,这可能意味着
CONVERT(DATE, SDP.FieldEntryDate) > DATEADD(dd,-24,GETDATE())
在Row_Number OVER子句之后发生了where子句的一部分。如果该WHERE子句排除了SPD表的重要部分,那么这将解释为什么OVER子句如此大规模地影响查询。您可以先(先)过滤(ed)然后再运行ROW_NUMBER。