我正在尝试从现有的评论字段中提取电子邮件地址并将其放入自己的列中。字符串可能是这样的“这是一个电子邮件地址为someemail@domain.org的示例评论”,或者只是电子邮件本身“someemail@domain.org”。
我认为最好的办法是找到'@'符号的索引并在两个方向上搜索,直到字符串的末尾被击中或有空格。任何人都可以帮我解决这个问题吗?
答案 0 :(得分:9)
我知道wewesthemenace已经回答了这个问题,但他/她的解决方案似乎过于复杂。为什么要将电子邮件地址的左侧和右侧连接在一起?我宁愿找到电子邮件地址的开头和结尾,然后使用substring返回电子邮件地址,如下所示:
DECLARE @Table TABLE (comment NVARCHAR(50));
INSERT INTO @Table
VALUES ('blah MyEmailAddress@domain.org'), --At the end
('blah MyEmailAddress@domain.org blah blah'), --In the middle
('MyEmailAddress@domain.org blah'), --At the beginning
('no email');
SELECT comment,
CASE
WHEN CHARINDEX('@',comment) = 0 THEN NULL
ELSE SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail)
END email
FROM @Table
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ',CHARINDEX('@',comment))) AS A(endOfEmail)
CROSS APPLY (SELECT DATALENGTH(comment)/2 - CHARINDEX(' ',REVERSE(' ' + comment),CHARINDEX('@',REVERSE(' ' + comment))) + 2) AS B(beginningOfEmail)
结果:
comment email
-------------------------------------------------- --------------------------------------------------
blah MyEmailAddress@domain.org MyEmailAddress@domain.org
blah MyEmailAddress@domain.org blah blah MyEmailAddress@domain.org
MyEmailAddress@domain.org blah MyEmailAddress@domain.org
no email NULL
答案 1 :(得分:7)
您可以在字符串中搜索'@'
。然后,您会在LEFT
的{{1}}和RIGHT
一侧获得字符串。然后,您想要'@'
REVERSE
方,并首先出现LEFT
,然后从那里获取' '
。然后SUBSTRING
获取原始表单。同样的原则适用于REVERSE
方而没有RIGHT
。
示例字符串:REVERSE
'some text someemail@domain.org some text'
='some some someemail'LEFT
='@ domain.org一些文字'RIGHT
直到第一个空格='liameemos'SUBSTRING
(4)= someemail REVERSE
(2)直到第一个空格='@ domain.org'您的查询将是:
SUBSTRING
示例数据:
;WITH CteEmail(email) AS(
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, CHARINDEX('@', email, 0) + 1)
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN CHARINDEX(' ', Reverse_Left, 0) = 0 THEN LEN(Reverse_Left) + 1
ELSE CHARINDEX(' ', Reverse_Left, 0)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN CHARINDEX(' ', [Right], 0) = 0 THEN LEN([Right]) + 1
ELSE CHARINDEX(' ', [Right], 0)
END
)
FROM CteStrings
<强>结果强>
email
----------------------------------------
someemail@domain.org
some text someemail@domain.org some text
no email
答案 2 :(得分:4)
Stephan的答案非常棒。
但是,当我尝试在每行中获取多个电子邮件地址时,我遇到了此错误:
传递给LEFT或SUBSTRING函数的长度参数无效
我使用this answer from DBA Stack Exchange获取字符串中@
的所有位置。它需要一个表值函数,它返回的位数等于字符串中某个模式的数量。我还必须修改CROSS APPLY
函数来处理多个电子邮件地址。
我的表:
DECLARE @Table TABLE (comment VARCHAR(500));
INSERT INTO @Table (comment)
VALUES ('blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com'),
('blah hello.world@domain.org more'),
('no email')
表值函数:
CREATE FUNCTION dbo.fnFindPatternLocation
(
@string NVARCHAR(MAX),
@term NVARCHAR(255)
)
RETURNS TABLE
AS
RETURN
(
SELECT pos = Number - LEN(@term)
FROM (SELECT Number, Item = LTRIM(RTRIM(SUBSTRING(@string, Number,
CHARINDEX(@term, @string + @term, Number) - Number)))
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects) AS n(Number)
WHERE Number > 1 AND Number <= CONVERT(INT, LEN(@string))
AND SUBSTRING(@term + @string, Number, LEN(@term)) = @term
) AS y);
GO
<强>查询强>:
SELECT comment, pos, SUBSTRING(comment,beginningOfEmail,endOfEmail-beginningOfEmail) AS email
FROM @Table
CROSS APPLY (SELECT pos FROM dbo.fnFindPatternLocation(comment, '@')) AS A(pos)
CROSS APPLY (SELECT CHARINDEX(' ',comment + ' ', pos)) AS B(endOfEmail)
CROSS APPLY (SELECT pos - CHARINDEX(' ', REVERSE(SUBSTRING(comment, 1, pos))) + 2) AS C(beginningOfEmail)
<强>结果:
comment
---------------------------------------------------------------------------------------------------------
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah blah My.EmailAddress@domain.org more blah someemailaddress@domain.com even more blah asdf@gmail.com
blah hello.world@domain.org more
pos email
--- ------------------------------
26 My.EmailAddress@domain.org
64 someemailaddress@domain.com
95 asdf@gmail.com
17 hello.world@domain.org
答案 3 :(得分:1)
DECLARE @t TABLE (row_id INT, email VARCHAR(100))
INSERT @t (row_id, email)
VALUES (1, 'drgkls<ivan@gvi.ru>, info@gvi.com, @ dgh507-16-65@'),
(2, 'hjshfkjshfj@kjs.kjsehf herwfjewr@kjsd.com adjfhja@.com u3483dhj@hhb@.dfj'),
(3, 'kjsdghfjs4254.23detygh@jhjdfg.dgb лдоврывплдоо isgfsi@ klsdfksdl@,dd.com')
DECLARE @pat VARCHAR(100) = '%[^a-z0-9@._ ]%';
WITH f AS (
SELECT row_id,
CAST(' ' + email + ' ' AS VARCHAR(102)) email,
SUBSTRING(email, PATINDEX(@pat, email), 1) bad,
PATINDEX(@pat, email) pat
FROM @t
UNION ALL
SELECT row_id,
CAST(REPLACE(email, bad, ' ') AS VARCHAR(102)),
SUBSTRING(REPLACE(email, bad, ' '), PATINDEX(@pat, REPLACE(email, bad, ' ')), 1) bad,
PATINDEX(@pat, REPLACE(email, bad, ' '))
FROM f
WHERE PATINDEX(@pat, email) > 0
),
s AS
(
SELECT row_id,
email, PATINDEX('%@%', email) pos
FROM f
WHERE pat = 0
AND PATINDEX('%@%', email) > 0
UNION ALL
SELECT row_id,
SUBSTRING(email, pos + 1, 102),
PATINDEX('%@%', SUBSTRING(email, pos + 1, 102))
FROM s
WHERE PATINDEX('%@%', SUBSTRING(email, pos + 1, 102)) > 0
)
SELECT row_id, o1 + pp
FROM s
CROSS APPLY (SELECT REVERSE(LEFT(email, pos -1)) s1) x
CROSS APPLY (SELECT CHARINDEX(' ', s1) i1) y
CROSS APPLY (SELECT REVERSE(LEFT(s1, i1 -1)) o1 WHERE i1 > 0) z
CROSS APPLY (SELECT CHARINDEX(' ', email, pos) i2) e
CROSS APPLY (SELECT SUBSTRING(email, pos, i2 -pos) pp WHERE i2 > pos + 1) q
WHERE LEN(o1) > 1
AND CHARINDEX('.', pp) > 0
AND PATINDEX('%@%@%', pp) = 0
AND PATINDEX('%@.%', pp) = 0
AND PATINDEX('%.', pp) = 0
答案 4 :(得分:0)
这一行也可行(虽然lol有点长):
--declare @a varchar(100)
--set @a = 'a asfd saasd asdfgh@asd.com wqe z zx cxzc '
select substring(substring(@a,0,charindex('@',@a)),len(substring(@a,0,charindex('@',@a)))-charindex(' ',reverse(substring(@a,0,charindex('@',@a))))+2,len(substring(@a,0,charindex('@',@a)))) + substring(substring(@a,charindex('@',@a),len(@a)),0,charindex(' ',substring(@a,charindex('@',@a),len(@a))))
答案 5 :(得分:0)
对于包含新行字符的字符串,我使用PATINDEX
修改了Felix的答案,以搜索第一个控制字符而不是空格。
我还必须修改Right
字段以减去正确的文字数量。
WITH CteEmail(email) AS(
SELECT 'example string with new lines
Email: some.example@email.address.com
(first email address - should be returned)
Email: another@test.co.uk
(other email addresses should be ignored
more example text' UNION ALL
SELECT 'Email: some.example@email.address.com' UNION ALL
SELECT 'someemail@domain.org' UNION ALL
SELECT 'some text someemail@domain.org some text' UNION ALL
SELECT 'no email'
)
,CteStrings AS(
SELECT
[Left] = LEFT(email, CHARINDEX('@', email, 0) - 1),
Reverse_Left = REVERSE(LEFT(email, CHARINDEX('@', email, 0) - 1)),
[Right] = RIGHT(email, LEN(email) - CHARINDEX('@', email, 0) + 1 )
FROM CteEmail
WHERE email LIKE '%@%'
)
SELECT *,
REVERSE(
SUBSTRING(Reverse_Left, 0,
CASE
WHEN PATINDEX('%[' + CHAR(10)+'- ]%', Reverse_Left) = 0 THEN LEN(Reverse_Left) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', Reverse_Left)
END
)
)
+
SUBSTRING([Right], 0,
CASE
WHEN PATINDEX('%[' + CHAR(0)+'- ]%', [Right]) = 0 THEN LEN([Right]) + 1
ELSE PATINDEX('%[' + CHAR(0)+'- ]%', [Right])
END
)
FROM CteStrings
答案 6 :(得分:0)
如果你在一个函数中需要它,那么这对我有用......
CREATE FUNCTION [dbo].[extractEmail]
(
@input nvarchar(500)
)
RETURNS nvarchar(100)
AS
BEGIN
DECLARE @atPosition int
DECLARE @firstRelevantSpace int
DECLARE @name nvarchar(100)
DECLARE @secondRelelvantSpace int
DECLARE @everythingAfterAt nvarchar(500)
DECLARE @domain nvarchar(100)
DECLARE @email nvarchar(100) = ''
IF CHARINDEX('@', @input,0) > 0
BEGIN
SET @input = ' ' + @input
SET @atPosition = CHARINDEX('@', @input, 0)
SET @firstRelevantSpace = CHARINDEX(' ',REVERSE(LEFT(@input, CHARINDEX('@', @input, 0) - 1)))
SET @name = REVERSE(LEFT(REVERSE(LEFT(@input, @atPosition - 1)),@firstRelevantSpace-1))
SET @everythingAfterAt = SUBSTRING(@input, @atPosition,len(@input)-@atPosition+1)
SET @secondRelelvantSpace = CHARINDEX(' ',@everythingAfterAt)
IF @secondRelelvantSpace = 0
SET @domain = @everythingAfterAt
ELSE
SET @domain = LEFT(@everythingAfterAt, @secondRelelvantSpace)
SET @email = @name + @domain
END
RETURN @email
END