SQL Server:从地址栏获取所有电子邮件和电话号码

时间:2017-04-13 11:21:55

标签: sql sql-server tsql

SQL Server中的表如下所示:

sn | name | address                                  |
------------------------------------------------
1  | abc  | new street, ND, homeemail@email.com 
   | work workemail@email.com, Contact numbers
   | Home phone 0019827343, Mobile 990288394 
2  | ..

输出应显示

sn| name| phone1 | phone2 | email1 | email2|
---------------------------------------------

任何T-SQL命令都可以执行此操作?

2 个答案:

答案 0 :(得分:2)

蒂姆提到......不是很漂亮

Declare @YourTable table (SN int,Name varchar(50),Address varchar(500))
Insert Into @YourTable values
 (1,'abc','new street, ND, homeemail@email.com  work workemail@email.com, Contact numbers  Home phone 0019827343, Mobile 990288394 ')

 Select A.SN
       ,A.Name
       ,B.*
 From  @YourTable A
 Cross Apply (
                Select Phone1=max(case when RN=1 and charindex('@',RetVal)=0 then RetVal end)
                      ,Phone2=max(case when RN=2 and charindex('@',RetVal)=0 then RetVal end)
                      ,Email1=max(case when RN=1 and charindex('@',RetVal)>0 then RetVal end)
                      ,Email2=max(case when RN=2 and charindex('@',RetVal)>0 then RetVal end)
                 From  (
                            Select *,RN = Row_Number() over (Partition By charindex('@',RetVal) Order By RetSeq)
                             From  (
                                    Select RetSeq = Row_Number() over (Order By (Select null))
                                          ,RetVal = LTrim(RTrim(x1.i.value('(./text())[1]', 'varchar(max)')))
                                    From  (Select x = Cast('<x>' + replace((Select replace(replace(A.Address,',',' '),' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as X
                                    Cross Apply x.nodes('x') AS x1(i)
                                   ) B1
                             Where RetVal Like '%@%.%'
                                or RetVal Like '[0-9][0-9][0-9][0-9][0-9]%'
                       ) B2
             ) B

<强>返回

SN  Name    Phone1      Phone2      Email1               Email2
1   abc     0019827343  990288394   homeemail@email.com  workemail@email.com

答案 1 :(得分:2)

我提出了一种类似于John的方法......您可以使用NGrams8K将字符串拆分为使用空格作为分隔符的标记。然后筛选看起来像可能的电子邮件或电话号码的令牌。这是Ngrams函数:

CREATE FUNCTION dbo.NGrams8k
(
  @string varchar(8000), -- Input string
  @N      int            -- requested token size
)
RETURNS TABLE WITH SCHEMABINDING AS RETURN
WITH
L1(N) AS
(
  SELECT 1
  FROM (VALUES    -- 90 NULL values used to create the CTE Tally Table
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),
        (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)
       ) t(N)
),
iTally(N) AS                                   -- my cte Tally Table
(
  SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
    ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
  FROM L1 a CROSS JOIN L1 b                    -- cartesian product for 8100 rows (90^2)
)
SELECT
  position = N,                                   -- position of the token in the string(s)
  token    = SUBSTRING(@string,CAST(N AS int),@N) -- the @N-Sized token
FROM iTally
WHERE @N > 0 AND @N <= DATALENGTH(@string);       -- Protection against bad parameter values

这是解决方案:

-- Sample data:
DECLARE @YourTable TABLE(SN int, Name varchar(50), [Address] varchar(500));
INSERT @YourTable VALUES
 (1,'abc','new street, ND, homeemail@email.com  work workemail@email.com, Contact numbers  Home phone 0019827343, Mobile 990288394 ');

-- solution:
WITH split(Pos, SN, Name, Token) AS
(
  SELECT Pos, SN, Name,
    SUBSTRING([Address], pos+1, ISNULL(NULLIF(CHARINDEX(' ',[Address],pos+2),0)-pos,500))
  FROM 
  (
    SELECT pos = position-1, [Address] = ' '+REPLACE([Address],',', ''), SN, Name
    FROM @YourTable
    CROSS APPLY dbo.NGrams8k(' '+REPLACE([Address],',', ''), 1) ng
    WHERE token = ' '
  ) pos
),
Tokens AS
(
  SELECT *, TN = ROW_NUMBER() OVER (ORDER BY pos)
  FROM split
  WHERE token LIKE '%@%' OR token LIKE '%[0-9][0-9][0-9][0-9][0-9]%'
)
SELECT 
  SN,
  Name,
  Phone1 = MAX(CASE TN WHEN 3 THEN token END),
  Phone2 = MAX(CASE TN WHEN 4 THEN token END),
  Email1 = MAX(CASE TN WHEN 1 THEN token END),
  Email2 = MAX(CASE TN WHEN 2 THEN token END)
FROM Tokens
GROUP BY SN, Name;