如何以逗号/分号分隔列表的形式提取字符串中可用的所有电子邮件地址列表
SELECT dbo.getEmailAddresses('this is misc andrew@g.com')
--output andrew@g.com
SELECT dbo.getEmailAddresses('this is misc andrew@g.com and a medium text returning %John@acme.com')
--output andrew@g.com; John@acme.com
答案 0 :(得分:1)
这是一个UDF。如果您需要其他(例如电子邮件地址中的非英语符号),请更正此行SET @MailTempl='[A-Za-z0-9_.-]
。 Here is a full list of allowed characters
CREATE FUNCTION [dbo].[getEmailAddresses] (@Str varchar(8000))
RETURNS varchar(8000) AS
BEGIN
declare @i int, @StartPos int,@AtPos int,@EndPos int;
declare @MailList varchar(8000);
declare @MailTempl varchar(100);
SET @MailList=NULL;
SET @MailTempl='[A-Za-z0-9_.-]'; --allowing symbols in e-mail not including @
SET @AtPos=PATINDEX('%'+@MailTempl+'@'+@MailTempl+'%',@Str)+1;
While @AtPos>1
begin
--go left
SET @i=@AtPos-1;
while (substring(@Str,@i,1) like @MailTempl) SET @i=@i-1;
SET @StartPos=@i+1;
--go right
SET @i=@AtPos+1;
while (substring(@Str,@i,1) like @MailTempl) SET @i=@i+1;
SET @EndPos=@i-1;
SET @MailList=isnull(@MailList+';','')+Substring(@Str,@StartPos,@EndPos-@StartPos+1);
--prepare for the next round
SET @Str=substring(@Str,@EndPos+1,LEn(@Str));
SET @AtPos=PATINDEX('%'+@MailTempl+'@'+@MailTempl+'%',@Str)+1;
end;
RETURN @MailList;
END
答案 1 :(得分:1)
试试这个:
create function getEmailAddresses
(
@test varchar(max)
)
returns varchar(max)
As
BEGIN
declare @emaillist varchar(max)
--SET @test=' this is it by it a@b.com dsdkjl dsaldkj a@b.com dasdlk c@bn.com dsafhjkf anand@p.com d fdajf s@s.com .'
;WITH CTE as(
select reverse(left(reverse(left(@test,CHARINDEX('.com',@test)+3)),charindex(' ',reverse(left(@test,CHARINDEX('.com',@test)+3))))) as emailids,
right(@test,len(@test)-(CHARINDEX('.com',@test)+3)) rem
union all
select CASE WHEN len(rem)>2 then reverse(left(reverse(left(rem,CHARINDEX('.com',rem)+3)),charindex(' ',reverse(left(rem,CHARINDEX('.com',rem)+3))))) else 'a' end as emailids ,
CASE WHEN len(rem) > 2 then right(rem,len(rem)-(CHARINDEX('.com',rem)+3)) else 'a' end rem
from CTE where LEN(rem)>2
)
select @emaillist =STUFF((select ','+emailids from CTE for XML PATH('')),1,1,'')
return @emaillist
END
select dbo.getEmailAddresses('this is it by it a@b.com dsdkjl dsaldkj a@b.com dasdlk c@bn.com dsafhjkf anand@p.com d fdajf s@s.com .')
答案 2 :(得分:1)
试试这个
Declare @str varchar(max) = 'this is misc andrew@g.com and a medium text returning John@acme.com'
;With Cte AS(
SELECT
Items = Split.a.value('.', 'VARCHAR(100)')
FROM
(
SELECT
CAST('<X>' + REPLACE(@str, ' ' , '</X><X>') + '</X>' AS XML) AS Splitdata
) X
CROSS APPLY Splitdata.nodes('/X') Split(a) )
SELECT Email = STUFF((
SELECT ';'+ Items
FROM Cte
Where Items
LIKE '[A-Z0-9]%[@][A-Z]%[.][A-Z]%'
FOR XML PATH('')),1,1,'')
结果
<强> EmailAddress的强>
andrew@g.com;John@acme.com
N.B.~您可能需要执行以下操作
a)根据您的要求,您需要制作一个TVF(表值函数)。您可以参考Split Function in Sql Server using Set base approach
上的文章b)可以使用Email validation like子句,但是对于更复杂的要求,您可能需要对其进行增强。
c)如果需要,您可能必须在应用过滤条款之前清理数据。例如%Yoo @ acme.com是无效的电子邮件。删除&#34;%&#34;签名然后应用过滤条款。
但正如有人提到的那样,最好不要在Sql Server端进行字符串拆分/操作,我同意他的观点,所以这里有一个用于实现相同的C#代码
static void Main(string[] args)
{
string str = "this is misc andrew@g.com and a medium text returning John@acme.com";
var result = GetValidEmails(str).Aggregate((a,b)=>a+";" + b);
Console.WriteLine(result);
Console.ReadKey();
}
private static List<string> GetValidEmails(string input)
{
List<string> lstValidEmails = new List<string>();
string regexPattern = @"^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$";
foreach (string email in input.Split(' '))
{
if (new Regex(regexPattern, RegexOptions.IgnoreCase).IsMatch(email))
{
lstValidEmails.Add(email);
}
}
return lstValidEmails;
}
希望这会有所帮助。