sql server如果不存在则插入并将插入的id插入另一个表中

时间:2014-03-27 12:54:45

标签: sql sql-server tsql stored-procedures

我是SQL Server的新手,正在开发一个记录日志的项目。

该表的URL列varchar(max)具有重复值。我创建了另一个表,它只存储不同的URL,ID存储在主表中

这是我的存储过程:

CREATE TABLE #TestData (
    logdate DATETIME,
    id CHAR(15),
    value VARCHAR(max)
    )

DECLARE @sql VARCHAR(max)

SET @sql = 'BULK INSERT [dbo].[#TestData] FROM ''' + @pfile + ''' WITH (
    firstrow = 2,
    fieldterminator = ''\t'',
    rowterminator = ''\n''
    )'

EXEC (@sql)

create table #testurl(fld varchar(max))

INSERT INTO #testurl(fld) (
        SELECT distinct (
            CASE 
                WHEN (PATINDEX('%url="%', value) > 0)
                    THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
                END
            ) FROM #TestData )

    INSERT INTO url (urlvalue) (
        SELECT tu.fld FROM #testurl tu WHERE NOT EXISTS (
            SELECT urlid
            FROM url u
            WHERE u.urlvalue = tu.fld))

    INSERT INTO [Cyberoam].[dbo].[logmst] (
        DATETIME,
        c1c2,
        c3c4,
        c5c6,
        c7,
        c8to12,
        STATUS,
        username,
        usergrp,
        application,
        category,
        categorytype,
        urlid,
        recvbytes,
        sentbytes,
        fw_rule_id,
        srcip,
        dstip,
        contenttype
        )
    SELECT logdate,
        SUBSTRING(value, (PATINDEX('%log_id=%', value) + 7), 2),
        SUBSTRING(value, (PATINDEX('%log_id=%', value) + 9), 2),
        SUBSTRING(value, (PATINDEX('%log_id=%', value) + 11), 2),
        SUBSTRING(value, (PATINDEX('%log_id=%', value) + 13), 1),
        SUBSTRING(value, (PATINDEX('%log_id=%', value) + 14), 5),
        CASE 
            WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Allow'
                THEN '1'
            WHEN (SUBSTRING(value, (PATINDEX('%status="%', value) + 8), (CHARINDEX('"', value, (PATINDEX('%status="%', value) + 8)) - (PATINDEX('%status="%', value) + 8)))) = 'Deny'
                THEN '0'
            ELSE NULL
            END,
        CASE 
            WHEN (ISNULL(PATINDEX('%user_name="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%user_name="%', value) + 11),(CHARINDEX('"', value, (PATINDEX('%user_name="%', value) + 11)) - (PATINDEX('%user_name="%', value) + 11))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%user_gp="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%user_gp="%', value) + 9), (CHARINDEX('"', value, (PATINDEX('%user_gp="%', value) + 9)) - (PATINDEX('%user_gp="%', value) + 9))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%application="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%application="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%application="%', value) + 13)) - (PATINDEX('%application="%', value) + 13))), ''))
            WHEN (isnull(PATINDEX('%application_name="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%application_name="%', value) + 18), (CHARINDEX('"', value, (PATINDEX('%application_name="%', value) + 18)) - (PATINDEX('%application_name="%', value) + 18))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%category="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%category="%', value) + 10), (CHARINDEX('"', value, (PATINDEX('%category="%', value) + 10)) - (PATINDEX('%category="%', value) + 10))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%category_type="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%category_type="%', value) + 15), (CHARINDEX('"', value, (PATINDEX('%category_type="%', value) + 15)) - (PATINDEX('%category_type="%', value) + 15))), ''))
            ELSE NULL
            END,
           (
            SELECT urlid
            FROM url
            WHERE urlvalue = (
                    CASE 
                        WHEN (isnull(PATINDEX('%url="%', value), 0) <> 0)
                            THEN (nullif(SUBSTRING(value, (PATINDEX('%url="%', value) + 5), (CHARINDEX('"', value, (PATINDEX('%url="%', value) + 5)) - (PATINDEX('%url="%', value) + 5))), ''))
                        ELSE NULL
                        END
                    )
            ),
        CASE 
            WHEN (isnull(PATINDEX('%recv_bytes=%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%recv_bytes=%', value) + 11), 20), ''))))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%sent_bytes=%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), (PATINDEX('%[^0-9]%', (nullif(SUBSTRING(value, (PATINDEX('%sent_bytes=%', value) + 11), 20), ''))))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%fw_rule_id=%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%fw_rule_id=%', value) + 11), (CHARINDEX(' ', value, (PATINDEX('%fw_rule_id=%', value) + 11)) - (PATINDEX('%fw_rule_id=%', value) + 11))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%src_ip=%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%src_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%src_ip=%', value) + 7)) - (PATINDEX('%src_ip=%', value) + 7))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%dst_ip=%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%dst_ip=%', value) + 7), (CHARINDEX(' ', value, (PATINDEX('%dst_ip=%', value) + 7)) - (PATINDEX('%dst_ip=%', value) + 7))), ''))
            ELSE NULL
            END,
        CASE 
            WHEN (isnull(PATINDEX('%contenttype="%', value), 0) <> 0)
                THEN (nullif(SUBSTRING(value, (PATINDEX('%contenttype="%', value) + 13), (CHARINDEX('"', value, (PATINDEX('%contenttype="%', value) + 13)) - (PATINDEX('%contenttype="%', value) + 13))), ''))
            ELSE NULL
            END
    FROM #TestData

此代码工作正常,但问题是运行大约5 k记录文件的批量插入所花费的时间逐渐增加(因为URL表增加到20分钟,其中有约5k条记录)。很多这样的文件将被插入。

需要你的建议,如何我可以改善表现,或者我是否做错了

非常感谢你的帮助。谢谢!

注意:如果URL列位于同一个表中,则大约需要4-7秒。 如果它在同一个表中或分开会产生任何性能差异吗?

1 个答案:

答案 0 :(得分:2)

为此找到了解决方案。如果有人需要,这就是解决方案。 所花费的时间很长,因为URL是varchar(max)。 我删除了聚集的PK索引。为url的校验和添加了一个新列,并在其上创建了聚簇索引。 并改变了:

SELECT urlid
        FROM url
        WHERE urlvalue = (@value)

到:

 SELECT urlid
        FROM url
        WHERE checksum_urlvalue = checksum(@value) nad urlvalue = (@value)

这将执行时间缩短为7-8秒。 谢谢大家的回复。 快乐的编码:)