全名

时间:2017-11-15 16:24:41

标签: sql-server tsql subquery substring data-extraction

Need help with extracting the firstname, middlename and lastname from a 
freetext fullname. How to extract them out with all these formats?
Need to figure out how to handle format 2,5,9,7

--fullname sample data
DECLARE @name TABLE 
(fullname VARCHAR(100))
INSERT INTO @name SELECT
'Malone,Susan M' UNION ALL SELECT --1
'Conn,Chris G'  UNION ALL SELECT --2
'Van Pess,Wen B' UNION ALL SELECT --3
'DESHPANDE, ANN W.' UNION ALL SELECT --4
'Asif,LEE' UNION ALL SELECT --5
'CERVANTES MANDY'UNION ALL SELECT --6
'Bill, Dave' UNION ALL SELECT --7
'SMITH,ANN M'  UNION ALL SELECT --8
'BHULLER,  MATT'  UNION ALL SELECT  --9
'KIM (DAUM), GAIL' UNION ALL SELECT  --10
'John.Mills'--11

DECLARE @DELIMITER1 varchar(5), @DELIMITER2 varchar(5), @DELIMITER3 
varchar(5),@MAX_LENGTH int
SET @DELIMITER1 = ','
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50


--LastName
SELECT   fullname,
case when
 CHARINDEX(@DELIMITER2, fullname) >=1
 then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER2, fullname) 
),',','')--replace to empty string if contains a ","
 when
 CHARINDEX(@DELIMITER2, fullname) =0
 then replace(SUBSTRING(fullname, 1, CHARINDEX(@DELIMITER1, fullname) 
),',','')--replace to empty string if contains a ","
 else null
 end as Lastname,


--Middle Name
CASE 
-- Middle fullname follows two-fullname first fullnames like Mary Ann 
   WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 
--when len is greater than 0
     THEN SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH)
        ELSE NULL
END AS Middlefullname,


--First Name
CASE 
-- Count the number of @DELIMITER2. Choose the string between the 

   WHEN LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(fullname, 
CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0 --
--when len is greater than 0
     Then replace(ltrim(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 
1, 
---need help here
(LEN(SUBSTRING(fullname, CHARINDEX(@DELIMITER1,fullname)+ 2,@MAX_LENGTH))- 
LEN(SUBSTRING(fullname, LEN(fullname) - CHARINDEX(@DELIMITER2, 
REVERSE(fullname))+2, @MAX_LENGTH))))),'-','') --replace the "-" to empty 
string
        ELSE ltrim(SUBSTRING(fullname,CHARINDEX(@DELIMITER1,fullname)+ 
    1,@MAX_LENGTH))--trimmed leading spaces
END AS Firstname
FROM @name
order by fullname

1 个答案:

答案 0 :(得分:0)

连接"名称部分"的第一个逻辑带点。请注意我的评论。

--fullname sample data
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100))
INSERT INTO @name SELECT
'Malone,Susan M' UNION ALL SELECT --1
'Conn,Chris G'  UNION ALL SELECT --2
'Van Pess,Wen B' UNION ALL SELECT --3
'DESHPANDE, ANN W.' UNION ALL SELECT --4
'Asif,LEE' UNION ALL SELECT --5
'CERVANTES MANDY'UNION ALL SELECT --6
'Bill, Dave' UNION ALL SELECT --7
'SMITH,ANN M'  UNION ALL SELECT --8
'BHULLER,  MATT'  UNION ALL SELECT  --9
'KIM (DAUM), GAIL' UNION ALL SELECT  --10
'John.Mills';--11


select original = fullname, prepped = dotted.fn, total.spaces
from @name
cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses:
cross apply (values ( -- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces:
  case 
    when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000)
    else replace(replace(fn, ',', '.'),'.',' ')
  end)) prep(fn)
cross apply (values (replace(rtrim(ltrim(replace(prep.fn,'  ',' '))),'  ',' '))) clean(fn)
cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces
cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn); -- replace spaces with dots 

返回

original                 prepped              spaces
------------------------ -------------------- -------
Malone,Susan M           Malone.Susan.M       2
Conn,Chris G             Conn.Chris.G         2
Van Pess,Wen B           Van.Pess.Wen.B       3
DESHPANDE, ANN W.        DESHPANDE.ANN.W      2
Asif,LEE                 Asif.LEE             1
CERVANTES MANDY          CERVANTES.MANDY      1
Bill, Dave               Bill.Dave            1
SMITH,ANN M              SMITH.ANN.M          2
BHULLER,  MATT           BHULLER.MATT         1
KIM (DAUM), GAIL         KIM.GAIL             1
John.Mills               John.Mills           1

其余的可以使用如下的解析名来完成:

--fullname sample data
DECLARE @name TABLE 
(nameid int identity, fullname VARCHAR(100))
INSERT INTO @name SELECT
'Malone,Susan M' UNION ALL SELECT --1
'Conn,Chris G'  UNION ALL SELECT --2
'Van Pess,Wen B' UNION ALL SELECT --3
'DESHPANDE, ANN W.' UNION ALL SELECT --4
'Asif,LEE' UNION ALL SELECT --5
'CERVANTES MANDY'UNION ALL SELECT --6
'Bill, Dave' UNION ALL SELECT --7
'SMITH,ANN M'  UNION ALL SELECT --8
'BHULLER,  MATT'  UNION ALL SELECT  --9
'KIM (DAUM), GAIL' UNION ALL SELECT  --10
'John.Mills';--11

with clean as 
(
  select original = fullname, prepped = dotted.fn, total.spaces
  from @name
  cross apply (values (patindex('%(%),%',fullname),fullname)) prep1(x,fn) -- check for parentheses:
  cross apply (values ( -- remove parentheses if they exist, replace commas w/ dots, dots w/ spaces:
    case 
      when prep1.x > 1 then substring(fn,1,x-1) + substring(fn,charindex(',',fn,x)+1,8000)
      else replace(replace(fn, ',', '.'),'.',' ')
    end)) prep(fn)
  cross apply (values (replace(rtrim(ltrim(replace(prep.fn,'  ',' '))),'  ',' '))) clean(fn)
  cross apply (values (len(clean.fn)-len(replace(clean.fn,' ','')))) total(spaces) -- count spaces
  cross apply (values (replace(clean.fn, ' ','.'))) dotted(fn)
)
select  original, cleaned = 
  case spaces 
    when 1 then parsename(prepped,1)+' '+parsename(prepped,2) 
    when 2 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3)
    when 3 then parsename(prepped,2)+' '+parsename(prepped,1)+' '+parsename(prepped,3)+
            ' '+parsename(prepped,4)
  end
from clean

<强>返回:

original             cleaned
-------------------- ------------------
Malone,Susan M       Susan M Malone
Conn,Chris G         Chris G Conn
Van Pess,Wen B       Wen B Pess Van
DESHPANDE, ANN W.    ANN W DESHPANDE
Asif,LEE             LEE Asif
CERVANTES MANDY      MANDY CERVANTES
Bill, Dave           Dave Bill
SMITH,ANN M          ANN M SMITH
BHULLER,  MATT       MATT BHULLER
KIM (DAUM), GAIL     GAIL KIM
John.Mills           Mills John

或者,您可以使用正则表达式clr(mdq.regexreplace)。几年前注意我的post on SSC