我试图确定每列大表中的总存储量。 其中有多个nvarchar。
一列是nvarchar(max),在导入时,将XML文本放入其中。 记录正确处理后。该列再次清空为空字符串。
在SQL Server“按顶级表的磁盘使用情况”报告中,我看到以下内容。
金额记录:1 808 604
保留(KB):15209272
数据(KB):14466776
索引(KB):731 896
未使用(KB):10 600
因此,我一直在寻找如何查找表中大量数据的位置。因为nvarchar(max)列几乎全部为空。
我对该表中的所有列执行了sum(datalength(columnname))。 并对所有列值求和。
这给了我:499344838字节= 0.499344838千兆字节
所以我现在想知道:
是否无法用sum(datalength(..))计算内容的大小
该计算是否正确?报告中使用的其他+-14GB在哪里?
编辑: 我已经完成了一些有关插入数据的测试(请参见下面的循环)
使用(ONLINE = OFF)重建ALTER TABLE批处理-> 22 928KB数据
DROP&CREATE表
在XML列中插入100K而不是nvarchar变量-> 22.864KB数据
通过该测试,其数据差异不大,+-23 vs 27 MB。
我将尝试在生产表上进行重建,但是我需要安排它。
Edit2:我使用循环进行了1000次测试:插入100 + set xml =''xml <>'' 之后的结果是264.008KB数据。 如果我进行了重建,则压缩到22.944KB。
因此,看起来重建可以提供解决方案。但是,关于在生产环境中进行多密集化的任何想法吗?如果需要执行,是否有可能在我的应用程序中检测到它?
带索引的表定义
CREATE TABLE [dbo].[BATCH](
[BATCH_PID] [bigint] IDENTITY(1,1) NOT NULL,
[XML_CREATION_DATE] [datetime] NULL,
[BATCH_REFERENCE] [nvarchar](50) NULL,
[SOURCE] [nvarchar](50) NULL,
[DOCUMENT_CLASS_FID] [int] NULL,
[XML_NAME] [nvarchar](150) NULL,
[XML_TYPE] [int] NULL,
[XML] [nvarchar](max) NULL,
[NUM_OF_DOCUMENTS] [int] NULL,
[NUM_OF_IMAGES] [int] NULL,
[PRIORITY] [int] NULL,
[STATUS] [int] NULL,
[USER_FID] [int] NULL,
[EXTENAL_USER] [nvarchar](50) NULL,
[REMARKS] [nvarchar](max) NULL,
[XML_PATH] [nvarchar](max) NULL,
[BATCH_CREATION_DATE] [datetime] NULL,
[BATCH_PROCESS_DATE] [datetime] NULL,
[Action] [int] NULL,
[IMPORT_LOCATION_FID] [bigint] NULL,
[QUARANTINE_LOCATION_FID] [bigint] NULL,
[QUARANTINE_DATE] [datetime] NULL,
[QUARANTINE] [bit] NULL,
[DOCS_ON_ERROR] [varchar](255) NULL,
[CAPTURE_XML] [nvarchar](max) NULL,
[IGNORE_PAC] [bit] NULL,
[APPLICATION] [int] NULL,
[EXTRA_INFO] [nvarchar](max) NULL,
[INPUT_TEXT] [nvarchar](max) NULL,
[PROCESS_TIME_BATCH] [int] NULL,
[PROCESS_TIME_DOCUMENT] [int] NULL,
[PROCESS_TIME_IMAGE] [int] NULL,
[BATCH_SIZE] [int] NULL,
[RULES] [nvarchar](1000) NULL,
[KEEP_XML] [bit] NULL,
PRIMARY KEY CLUSTERED
(
[BATCH_PID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
CREATE NONCLUSTERED INDEX [IDX_BATCH_Action] ON [dbo].[BATCH]
(
[Action] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_BATCH_CREATION_DATE] ON [dbo].[BATCH]
(
[BATCH_CREATION_DATE] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_BATCH_SIZE] ON [dbo].[BATCH]
(
[BATCH_SIZE] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_DOCUMENT_CLASS_FID] ON [dbo].[BATCH]
(
[DOCUMENT_CLASS_FID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_IMPORT_LOCATION_FID] ON [dbo].[BATCH]
(
[IMPORT_LOCATION_FID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_PRIORITY] ON [dbo].[BATCH]
(
[PRIORITY] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_PROCESS_TIME_BATCH] ON [dbo].[BATCH]
(
[PROCESS_TIME_BATCH] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_PROCESS_TIME_DOCUMENT] ON [dbo].[BATCH]
(
[PROCESS_TIME_DOCUMENT] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_PROCESS_TIME_IMAGE] ON [dbo].[BATCH]
(
[PROCESS_TIME_IMAGE] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_QUARANTINE] ON [dbo].[BATCH]
(
[QUARANTINE] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_QUARANTINE_LOCATION_FID] ON [dbo].[BATCH]
(
[QUARANTINE_LOCATION_FID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_SOURCE] ON [dbo].[BATCH]
(
[SOURCE] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_STATUS] ON [dbo].[BATCH]
(
[STATUS] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_USER_FID] ON [dbo].[BATCH]
(
[USER_FID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
CREATE NONCLUSTERED INDEX [IDX_BATCH_XML_NAME] ON [dbo].[BATCH]
(
[XML_NAME] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
我还创建了一个简单的循环来插入数据
declare @numInserts int = 10000
declare @buildvarchar int = 10000
declare @maxvarchar nvarchar(max) = N'X'
begin
while @buildvarchar > 0
begin
set @maxvarchar = @maxvarchar + N'X'
set @buildvarchar = @buildvarchar - 1
end
while @numInserts > 0
begin
INSERT [dbo].[BATCH] ([XML_CREATION_DATE], [BATCH_REFERENCE], [SOURCE], [DOCUMENT_CLASS_FID], [XML_NAME], [XML_TYPE], [XML], [NUM_OF_DOCUMENTS], [NUM_OF_IMAGES], [PRIORITY], [STATUS], [USER_FID], [EXTENAL_USER], [REMARKS], [XML_PATH], [BATCH_CREATION_DATE], [BATCH_PROCESS_DATE], [Action], [IMPORT_LOCATION_FID], [QUARANTINE_LOCATION_FID], [QUARANTINE_DATE], [QUARANTINE], [DOCS_ON_ERROR], [CAPTURE_XML], [IGNORE_PAC], [APPLICATION], [EXTRA_INFO], [INPUT_TEXT], [PROCESS_TIME_BATCH], [PROCESS_TIME_DOCUMENT], [PROCESS_TIME_IMAGE], [BATCH_SIZE], [RULES], [KEEP_XML])
VALUES (CAST(N'2017-09-21T14:56:46.000' AS DateTime), N'', N'iDesk', 1, N'21-09-2017-14-44-58-501574', 2,
@maxvarchar, 0, 0, 1, 9, 1, N'', N'', N'D:\BaseDir\', CAST(N'2017-09-21T14:56:46.000' AS DateTime), CAST(N'2017-09-21T14:56:46.000' AS DateTime), 3, 1, 0, CAST(N'1900-01-01T00:00:00.000' AS DateTime), 0, N'1', NULL, NULL, 4, NULL, N'', 412, 0, 0, 0, N'', 0)
set @numInserts = @numInserts - 1
end
end
答案 0 :(得分:2)
通过以下语句,我设法将表的大小减小了27GB。
ALTER TABLE批处理重建为(ONLINE = OFF)
看起来SQL不会自动重用该空间。
我没有找到有关如何检测它是否可以提前清洗东西的解决方案。总数据长度查询差异大约为400MB。
答案 1 :(得分:1)
•无法通过以下方式计算内容的大小 sum(datalength(..))
创建仅包含一列和一个值的表。
CREATE TABLE [dbo].[BATCH2] ([BATCH_PID] INT)
INSERT INTO [BATCH2] VALUES (1)
SELECT DATALENGTH(BATCH_PID) FROM BATCH2
这将返回4,因为我们知道INT Storage
是4 byte
。
如果我们使用sp_spaceused BATCH2
您会注意到数据列为8KB。为什么?
我们知道数据存储在Page中,每个页面占用8KB。
因此在我们的示例中,即使一行也需要1页,即8KB。
因此datalength将为您提供大小数据。
•计算是否正确,另外的+-14GB在哪里 报告吗?
如果我们第一次插入10000行,然后说删除/更新一些不同长度的行,那么sp_spaceused BATCH2
可能不会返回正确的值,因为内存没有用Insert/Update/Delete
回收。行或更新某些行,则Pages中可能有一些未使用的内存,或者由于更新量较大,可能还有更多页面。
但这不会立即反映出来。
因此,我们有时需要Rebuild Index
特别有很多索引的表。
因此,一旦我们重建索引,即组织了索引并且在数据页和索引页中组织了数据,就可以回收丢失的内存。
ALTER INDEX ALL ON [dbo].[BATCH] reorganize
所以sp_spaceused BATCH
现在将提供正确的数据。