使用tsql从字符串中提取电子邮件地址

时间:2015-04-13 01:47:55

标签: sql sql-server regex tsql

我正在尝试从现有的评论字段中提取电子邮件地址并将其放入自己的列中。字符串可能是这样的“这是一个电子邮件地址为someemail@domain.org的示例评论”,或者只是电子邮件本身“someemail@domain.org”。

我认为最好的办法是找到'@'符号的索引并在两个方向上搜索,直到字符串的末尾被击中或有空格。任何人都可以帮我解决这个问题吗?

7 个答案:

答案 0 :(得分:9)

我知道wewesthemenace已经回答了这个问题,但他/她的解决方案似乎过于复杂。为什么要将电子邮件地址的左侧和右侧连接在一起?我宁愿找到电子邮件地址的开头和结尾,然后使用substring返回电子邮件地址,如下所示:

我的表

DECLARE @Table TABLE (comment NVARCHAR(50));
INSERT INTO @Table
VALUES ('blah MyEmailAddress@domain.org'),            --At the end
        ('blah MyEmailAddress@domain.org blah blah'), --In the middle
        ('MyEmailAddress@domain.org blah'),           --At the beginning
        ('no email');

实际查询:

SELECT  comment,        
        CASE
            WHEN CHARINDEX('@',comment) = 0 THEN NULL
            ELSE SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail)
        END email
FROM @Table
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ',CHARINDEX('@',comment))) AS A(endOfEmail)
CROSS APPLY (SELECT DATALENGTH(comment)/2 - CHARINDEX(' ',REVERSE(' ' + comment),CHARINDEX('@',REVERSE(' ' + comment))) + 2) AS B(beginningOfEmail)

结果:

comment                                            email
-------------------------------------------------- --------------------------------------------------
blah MyEmailAddress@domain.org                     MyEmailAddress@domain.org
blah MyEmailAddress@domain.org blah blah           MyEmailAddress@domain.org
MyEmailAddress@domain.org blah                     MyEmailAddress@domain.org
no email                                           NULL

答案 1 :(得分:7)

您可以在字符串中搜索'@'。然后,您会在LEFT的{​​{1}}和RIGHT一侧获得字符串。然后,您想要'@' REVERSE方,并首先出现LEFT,然后从那里获取' '。然后SUBSTRING获取原始表单。同样的原则适用于REVERSE方而没有RIGHT

示例字符串:REVERSE

  1. 'some text someemail@domain.org some text' ='some some someemail'
  2. LEFT ='@ domain.org一些文字'
  3. Reverse LEFT ='liameemos txet emos'
  4. RIGHT直到第一个空格='liameemos'
  5. SUBSTRING(4)= someemail
  6. REVERSE(2)直到第一个空格='@ domain.org'
  7. 结合5和6 ='someemail@domain.org'
  8. 您的查询将是:

    SUBSTRING

    示例数据:

    ;WITH CteEmail(email) AS(
        SELECT 'someemail@domain.org' UNION ALL
        SELECT 'some text someemail@domain.org some text' UNION ALL
        SELECT 'no email'
    )
    ,CteStrings AS(
        SELECT
            [Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
            Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
            [Right] = RIGHT(email, CHARINDEX('@', email, 0) + 1)
        FROM CteEmail
        WHERE email LIKE '%@%'
    )
    SELECT *,
        REVERSE(
            SUBSTRING(Reverse_Left, 0, 
                CASE
                    WHEN CHARINDEX(' ', Reverse_Left, 0) = 0 THEN LEN(Reverse_Left) + 1
                    ELSE CHARINDEX(' ', Reverse_Left, 0)
                END
            )
        )
        +
        SUBSTRING([Right], 0,
            CASE
                WHEN CHARINDEX(' ', [Right], 0) = 0 THEN LEN([Right]) + 1
                ELSE CHARINDEX(' ', [Right], 0)
            END
        )
    FROM CteStrings
    

    <强>结果

    email
    ----------------------------------------
    someemail@domain.org
    some text someemail@domain.org some text
    no email
    

答案 2 :(得分:4)

在每行中查找单个电子邮件地址时,

Stephan的答案非常棒。

但是,当我尝试在每行中获取多个电子邮件地址时,我遇到了此错误:

  

传递给LEFT或SUBSTRING函数的长度参数无效

我使用this answer from DBA Stack Exchange获取字符串中@的所有位置。它需要一个表值函数,它返回的位数等于字符串中某个模式的数量。我还必须修改CROSS APPLY函数来处理多个电子邮件地址。

我的表

DECLARE @Table TABLE (comment VARCHAR(500));
INSERT INTO @Table (comment)
VALUES ('blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com'),
       ('blah hello.world@domain.org more'),
       ('no email')

表值函数

CREATE FUNCTION dbo.fnFindPatternLocation
(
    @string NVARCHAR(MAX),
    @term   NVARCHAR(255)
)
RETURNS TABLE
AS
    RETURN 
    (
        SELECT pos = Number - LEN(@term) 
        FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number, 
        CHARINDEX(@term, @string + @term, Number) - Number)))
        FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
        FROM sys.all_objects) AS n(Number)
        WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string))
        AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
    ) AS y);
GO

<强>查询

SELECT comment, pos, SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail) AS email
FROM @Table
CROSS APPLY (SELECT pos FROM dbo.fnFindPatternLocation(comment, '@')) AS A(pos)
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ', pos)) AS B(endOfEmail)
CROSS APPLY (SELECT pos - CHARINDEX(' ', REVERSE(SUBSTRING(comment, 1, pos))) + 2) AS C(beginningOfEmail)

<强>结果:

comment
---------------------------------------------------------------------------------------------------------
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah hello.world@domain.org more
pos    email
---    ------------------------------
26     My.EmailAddress@domain.org
64     someemailaddress@domain.com
95     asdf@gmail.com
17     hello.world@domain.org

答案 3 :(得分:1)

DECLARE @t TABLE (row_id INT, email VARCHAR(100))

INSERT @t (row_id, email)
VALUES (1, 'drgkls<ivan@gvi.ru>, info@gvi.com, @ dgh507-16-65@'),
        (2, 'hjshfkjshfj@kjs.kjsehf herwfjewr@kjsd.com adjfhja@.com u3483dhj@hhb@.dfj'),
        (3, 'kjsdghfjs4254.23detygh@jhjdfg.dgb лдоврывплдоо isgfsi@ klsdfksdl@,dd.com')

DECLARE @pat VARCHAR(100) = '%[^a-z0-9@._ ]%';

WITH f AS (
         SELECT    row_id,
                 CAST(' ' + email + ' ' AS VARCHAR(102)) email,
                 SUBSTRING(email, PATINDEX(@pat, email), 1) bad,
                 PATINDEX(@pat, email) pat
         FROM    @t
         UNION ALL
         SELECT    row_id,
                 CAST(REPLACE(email, bad, ' ') AS VARCHAR(102)),
                 SUBSTRING(REPLACE(email, bad, ' '), PATINDEX(@pat, REPLACE(email, bad, ' ')), 1) bad,
                 PATINDEX(@pat, REPLACE(email, bad, ' '))
         FROM    f
         WHERE    PATINDEX(@pat, email) > 0
     ),
     s AS 
     (
         SELECT    row_id,
                 email, PATINDEX('%@%', email) pos 
         FROM    f 
         WHERE    pat = 0
                 AND    PATINDEX('%@%', email) > 0
         UNION ALL
         SELECT    row_id,
                 SUBSTRING(email, pos + 1, 102), 
                 PATINDEX('%@%', SUBSTRING(email, pos + 1, 102))
         FROM    s
         WHERE    PATINDEX('%@%', SUBSTRING(email, pos + 1, 102)) > 0
     )

SELECT  row_id, o1 + pp
FROM    s   
        CROSS APPLY (SELECT    REVERSE(LEFT(email, pos -1)) s1) x
        CROSS APPLY (SELECT    CHARINDEX(' ', s1) i1) y
        CROSS APPLY (SELECT    REVERSE(LEFT(s1, i1 -1)) o1 WHERE i1 > 0) z
        CROSS APPLY (SELECT    CHARINDEX(' ', email, pos) i2) e
        CROSS APPLY (SELECT    SUBSTRING(email, pos, i2 -pos) pp WHERE    i2 > pos + 1) q
WHERE    LEN(o1) > 1
        AND CHARINDEX('.', pp) > 0
        AND PATINDEX('%@%@%', pp) = 0
        AND PATINDEX('%@.%', pp) = 0
        AND PATINDEX('%.', pp) = 0

答案 4 :(得分:0)

这一行也可行(虽然lol有点长):

--declare @a varchar(100) 
--set @a = 'a asfd saasd asdfgh@asd.com wqe z zx cxzc '
select substring(substring(@a,0,charindex('@',@a)),len(substring(@a,0,charindex('@',@a)))-charindex(' ',reverse(substring(@a,0,charindex('@',@a))))+2,len(substring(@a,0,charindex('@',@a)))) + substring(substring(@a,charindex('@',@a),len(@a)),0,charindex(' ',substring(@a,charindex('@',@a),len(@a))))

答案 5 :(得分:0)

对于包含新行字符的字符串,我使用PATINDEX修改了Felix的答案,以搜索第一个控制字符而不是空格。

我还必须修改Right字段以减去正确的文字数量。

    WITH CteEmail(email) AS(
        SELECT 'example string with new lines

    Email: some.example@email.address.com
(first email address - should be returned)

    Email: another@test.co.uk
(other email addresses should be ignored

more example text' UNION ALL
        SELECT 'Email: some.example@email.address.com' UNION ALL
        SELECT 'someemail@domain.org' UNION ALL
        SELECT 'some text someemail@domain.org some text' UNION ALL
        SELECT 'no email'
    )
    ,CteStrings AS(
        SELECT
            [Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
            Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
            [Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1 )
        FROM CteEmail
        WHERE email LIKE '%@%'
    )
    SELECT *,
        REVERSE(
            SUBSTRING(Reverse_Left, 0, 
                CASE
                    WHEN PATINDEX('%[' + CHAR(10)+'- ]%', Reverse_Left) = 0 THEN LEN(Reverse_Left) + 1
                    ELSE PATINDEX('%[' + CHAR(0)+'- ]%', Reverse_Left)
                END
            )
        )
        +
        SUBSTRING([Right], 0,
            CASE
                WHEN PATINDEX('%[' + CHAR(0)+'- ]%', [Right]) = 0 THEN LEN([Right]) + 1
                ELSE PATINDEX('%[' + CHAR(0)+'- ]%', [Right])
            END
        )
    FROM CteStrings

答案 6 :(得分:0)

如果你在一个函数中需要它,那么这对我有用......

CREATE FUNCTION [dbo].[extractEmail]
(
    @input nvarchar(500)
)
RETURNS nvarchar(100)
AS
BEGIN
    DECLARE @atPosition int
    DECLARE @firstRelevantSpace int
    DECLARE @name nvarchar(100)
    DECLARE @secondRelelvantSpace int
    DECLARE @everythingAfterAt nvarchar(500)
    DECLARE @domain nvarchar(100)
    DECLARE @email nvarchar(100) = ''
    IF CHARINDEX('@', @input,0) > 0
    BEGIN
        SET @input = ' ' + @input
        SET @atPosition = CHARINDEX('@', @input, 0)
        SET @firstRelevantSpace = CHARINDEX(' ',REVERSE(LEFT(@input, CHARINDEX('@', @input, 0) - 1)))
        SET @name = REVERSE(LEFT(REVERSE(LEFT(@input, @atPosition - 1)),@firstRelevantSpace-1))
        SET @everythingAfterAt = SUBSTRING(@input, @atPosition,len(@input)-@atPosition+1)
        SET @secondRelelvantSpace = CHARINDEX(' ',@everythingAfterAt)
        IF @secondRelelvantSpace = 0
            SET @domain = @everythingAfterAt
        ELSE
            SET @domain = LEFT(@everythingAfterAt, @secondRelelvantSpace)
        SET @email = @name + @domain
    END
    RETURN @email
END