我是SQL Server的新手,正在开发一个记录日志的项目。
该表的URL列varchar(max)具有重复值。我创建了另一个表,它只存储不同的URL,ID存储在主表中
这是我的存储过程:
CREATE TABLE #TestData (
logdate DATETIME,
id CHAR(15),
value VARCHAR(max)
)
DECLARE @sql VARCHAR(max)
SET @sql = 'BULK INSERT [dbo].[#TestData] FROM ''' + @pfile + ''' WITH (
firstrow = 2,
fieldterminator = ''\t'',
rowterminator = ''\n''
)'
EXEC (@sql)
create table #testurl(fld varchar(max))
INSERT INTO #testurl(fld) (
SELECT distinct (
CASE
WHEN (PATINDEX('%url="%', value) > 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
END
) FROM #TestData )
INSERT INTO url (urlvalue) (
SELECT tu.fld FROM #testurl tu WHERE NOT EXISTS (
SELECT urlid
FROM url u
WHERE u.urlvalue = tu.fld))
INSERT INTO [Cyberoam].[dbo].[logmst] (
DATETIME,
c1c2,
c3c4,
c5c6,
c7,
c8to12,
STATUS,
username,
usergrp,
application,
category,
categorytype,
urlid,
recvbytes,
sentbytes,
fw_rule_id,
srcip,
dstip,
contenttype
)
SELECT logdate,
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 7), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 9), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 11), 2),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 13), 1),
SUBSTRING(value, (PATINDEX('%log_id=%', value) + 14), 5),
CASE
WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Allow'
THEN '1'
WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Deny'
THEN '0'
ELSE NULL
END,
CASE
WHEN (ISNULL(PATINDEX('%user_name="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%user_name="%', value) + 11),(CHARINDEX('"', value, (PATINDEX('%user_name="%', value) + 11)) - (PATINDEX('%user_name="%', value) + 11))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%user_gp="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%user_gp="%', value) + 9), (CHARINDEX('"', value, (PATINDEX('%user_gp="%', value) + 9)) - (PATINDEX('%user_gp="%', value) + 9))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%application="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%application="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%application="%', value) + 13)) - (PATINDEX('%application="%', value) + 13))), ''))
WHEN (isnull(PATINDEX('%application_name="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%application_name="%', value) + 18), (CHARINDEX('"', value, (PATINDEX('%application_name="%', value) + 18)) - (PATINDEX('%application_name="%', value) + 18))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%category="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%category="%', value) + 10), (CHARINDEX('"', value, (PATINDEX('%category="%', value) + 10)) - (PATINDEX('%category="%', value) + 10))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%category_type="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%category_type="%', value) + 15), (CHARINDEX('"', value, (PATINDEX('%category_type="%', value) + 15)) - (PATINDEX('%category_type="%', value) + 15))), ''))
ELSE NULL
END,
(
SELECT urlid
FROM url
WHERE urlvalue = (
CASE
WHEN (isnull(PATINDEX('%url="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
ELSE NULL
END
)
),
CASE
WHEN (isnull(PATINDEX('%recv_bytes=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), 20), ''))))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%sent_bytes=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), 20), ''))))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%fw_rule_id=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%fw_rule_id=%', value) + 11), (CHARINDEX(' ', value, (PATINDEX('%fw_rule_id=%', value) + 11)) - (PATINDEX('%fw_rule_id=%', value) + 11))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%src_ip=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%src_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%src_ip=%', value) + 7)) - (PATINDEX('%src_ip=%', value) + 7))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%dst_ip=%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%dst_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%dst_ip=%', value) + 7)) - (PATINDEX('%dst_ip=%', value) + 7))), ''))
ELSE NULL
END,
CASE
WHEN (isnull(PATINDEX('%contenttype="%', value), 0) <> 0)
THEN (nullif(SUBSTRING(value, (PATINDEX('%contenttype="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%contenttype="%', value) + 13)) - (PATINDEX('%contenttype="%', value) + 13))), ''))
ELSE NULL
END
FROM #TestData
此代码工作正常,但问题是运行大约5 k记录文件的批量插入所花费的时间逐渐增加(因为URL表增加到20分钟,其中有约5k条记录)。很多这样的文件将被插入。
需要你的建议,如何我可以改善表现,或者我是否做错了
非常感谢你的帮助。谢谢!
注意:如果URL列位于同一个表中,则大约需要4-7秒。 如果它在同一个表中或分开会产生任何性能差异吗?
答案 0 :(得分:2)
为此找到了解决方案。如果有人需要,这就是解决方案。 所花费的时间很长,因为URL是varchar(max)。 我删除了聚集的PK索引。为url的校验和添加了一个新列,并在其上创建了聚簇索引。 并改变了:
SELECT urlid
FROM url
WHERE urlvalue = (@value)
到:
SELECT urlid
FROM url
WHERE checksum_urlvalue = checksum(@value) nad urlvalue = (@value)
这将执行时间缩短为7-8秒。 谢谢大家的回复。 快乐的编码:)