从全名解析Prefix First Middle Last Suffix

时间:2017-03-21 01:40:25

标签: tsql

我需要解析格式的全名,前缀为第一个中间后缀,但不是所有部分都可以包含在内。我有前缀中间和最后一个工作,但Jr被填入姓氏。如何在后缀列中显示后缀?示例包括数据。

SELECT
  FIRST_NAME.INPUT_DATA
 ,FIRST_NAME.PREFIX
 ,FIRST_NAME.FIRST_NAME
 ,CASE WHEN 0 = CHARINDEX(' ',FIRST_NAME.REMAINING)
       THEN NULL  --no more spaces found, consider remaining to be last name
       ELSE SUBSTRING(
                       FIRST_NAME.REMAINING
                      ,1
                      ,CHARINDEX(' ',FIRST_NAME.REMAINING)-1
                     )
       END AS MIDDLE_NAME
 ,SUBSTRING(
             FIRST_NAME.REMAINING
            ,1 + CHARINDEX(' ',FIRST_NAME.REMAINING)
            ,LEN(FIRST_NAME.REMAINING)
           ) AS LAST_NAME
FROM
  (  
  SELECT
    PREFIX.PREFIX
   ,CASE WHEN 0 = CHARINDEX(' ',PREFIX.REMAINING)
         THEN PREFIX.REMAINING --no space found, return the entire string
         ELSE SUBSTRING(
                         PREFIX.REMAINING
                        ,1
                        ,CHARINDEX(' ',PREFIX.REMAINING)-1
                       )
    END AS FIRST_NAME
   ,CASE WHEN 0 = CHARINDEX(' ',PREFIX.REMAINING)  
         THEN NULL  --no spaces found, consider to be first name
         ELSE SUBSTRING(
                         PREFIX.REMAINING
                        ,CHARINDEX(' ',PREFIX.REMAINING)+1
                        ,LEN(PREFIX.REMAINING)
                       )
    END AS REMAINING
   ,PREFIX.INPUT_DATA
  FROM
    (   
    SELECT --CLEAN_DATA
      --if  first three characters match list,
      --parse as a "PREFIX".  else return NULL for PREFIX.
      CASE WHEN SUBSTRING(CLEAN_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
           THEN LTRIM(RTRIM(SUBSTRING(CLEAN_DATA.FULL_NAME,1,3)))
           ELSE NULL
           END AS PREFIX
     ,CASE WHEN SUBSTRING(CLEAN_DATA.FULL_NAME,1,3) IN ('MR ','MS ','DR ','MRS')
           THEN LTRIM(RTRIM(SUBSTRING(CLEAN_DATA.FULL_NAME,4,LEN(CLEAN_DATA.FULL_NAME))))
           ELSE LTRIM(RTRIM(CLEAN_DATA.FULL_NAME))
           END AS REMAINING
     ,CLEAN_DATA.INPUT_DATA
    FROM
      (
      SELECT
        --trim leading & trailing spaces to prepare for processing
        --replace extra spaces in name
        REPLACE(REPLACE(LTRIM(RTRIM(FULL_NAME)),'  ',' '),'  ',' ') AS FULL_NAME
       ,FULL_NAME AS INPUT_DATA
      FROM
        (
        --test with test data, or table
        --table
            --SELECT        CONTACT AS FULL_NAME
            --FROM            CONTACT

        --test data
        --/*
             SELECT 'Andy D Where' AS FULL_NAME
        UNION SELECT 'Cathy T Landers' AS FULL_NAME
        UNION SELECT 'Ms Annie Wint There' AS FULL_NAME
        UNION SELECT 'Frank Fields' AS FULL_NAME
        UNION SELECT 'Howdy U Pokes Jr.' AS FULL_NAME
        --*/

        ) SOURCE_DATA
      ) CLEAN_DATA
    ) PREFIX
  ) FIRST_NAME

- 当然要归功于JStyons

1 个答案:

答案 0 :(得分:0)

希望这会有所帮助。我只添加了Generational SUFFIX标题(Sr,Jr),如果需要更多,可以根据需要添加到Case语句中。我也假设您的Db不区分大小写。

假设(业务规则):

  1. 名字没有空格
  2. 中间名称没有空格
  3. 姓氏没有空格
  4. 前缀仅为'MR','MS','DR','MRS'形式,没有句号“。”
  5. 后缀只有'Sr','Jr','Sr。','Jr。'的形式。
  6. 数据库不区分大小写
  7. IF OBJECT_ID('tempdb..#cte_SpaceFix') IS NOT NULL
        DROP TABLE #cte_SpaceFix
    
    ;WITH cte_OriginalData (FullName)
    AS (
        SELECT 'Andy D Where'
    
        UNION
    
        SELECT 'Cathy T  Landers'
    
        UNION
    
        SELECT 'Ms Annie    Wint There'
    
        UNION
    
        SELECT 'Ms Annie    Wint There Jr'
    
        UNION
    
        SELECT 'Mrs Annie     There Jr'
    
        UNION
    
        SELECT 'Frank     Fields'
    
        UNION
    
        SELECT 'Howdy      U Pokes Jr.'
    
        UNION
    
        SELECT 'Howdy U Pokes Sr.'
    
        UNION
    
        SELECT 'Cathy T Landers Jr'
    
        UNION
    
        SELECT 'Landers Jr'
        )
        ,cte_FullNameRemoveTail AS
        (
        SELECT LTRIM(RTRIM(FullName)) AS FullName
        FROM cte_OriginalData
        )
        ,cte_Parse_Prefix(Prefix,FullFirst_Prefix,FullName) AS
        (
        SELECT CASE 
                WHEN SUBSTRING(FullName, 1, 3) IN ('MR ','MS ','DR ','MRS')
                    THEN LTRIM(RTRIM(SUBSTRING(FullName, 1, 3)))
                ELSE NULL
                END AS Prefix,
            CASE 
                WHEN SUBSTRING(FullName, 1, 3) IN ('MR ','MS ','DR ','MRS')
                    THEN LTRIM(RTRIM(SUBSTRING(FullName, 4, 8000)))
                ELSE LTRIM(RTRIM(FullName))
                END AS FullFirst_Prefix,
            FullName
        FROM cte_FullNameRemoveTail
        )
        ,cte_Parse_Suffix(Prefix,FullFirst_Prefix_Suffix,Suffix,FullName) AS
        (
        SELECT Prefix,
            CASE 
                WHEN RIGHT(FullFirst_Prefix,3) = ' JR'  THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-3)))
                WHEN RIGHT(FullFirst_Prefix,4) = ' JR.' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-4)))
                WHEN RIGHT(FullFirst_Prefix,3) = ' SR'  THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-3)))
                WHEN RIGHT(FullFirst_Prefix,4) = ' SR.' THEN LTRIM(RTRIM(SUBSTRING(FullFirst_Prefix,1,LEN(FullFirst_Prefix)-4)))
            ELSE LTRIM(RTRIM(FullFirst_Prefix))
            END AS FullFirst_Prefix_Suffix,
            CASE 
                WHEN RIGHT(FullFirst_Prefix,3) = ' JR'
                    OR RIGHT(FullFirst_Prefix,4) = ' JR.'
                THEN 'Jr'
                WHEN RIGHT(FullFirst_Prefix,3) = ' SR' 
                    OR RIGHT(FullFirst_Prefix,4) = ' SR.' 
                THEN 'Sr'
                ELSE NULL
            END AS Suffix,
            FullName
        FROM cte_Parse_Prefix
        )
        ,cte_SpaceFix(Prefix, FullFirst_Prefix_Suffix, Suffix, FullName) AS
        (
        SELECT Prefix,
            CASE 
                WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) > 2 THEN REPLACE(REPLACE(REPLACE(REPLACE(FullFirst_Prefix_Suffix,SPACE(5), SPACE(1)),SPACE(4), SPACE(1)),SPACE(3), SPACE(1)),SPACE(2), SPACE(1))
                ELSE FullFirst_Prefix_Suffix
            END AS FullFirst_Prefix_Suffix,
            Suffix,
            FullName
        FROM cte_Parse_Suffix
        )
        SELECT * INTO #cte_SpaceFix
        FROM cte_SpaceFix
    
        ;WITH cte_Parse_FirstName(Prefix, FirstName, Suffix, FullFirst_Prefix_Suffix_FirstName, FullName) AS
        (
        SELECT Prefix,
            CASE 
                WHEN FullFirst_Prefix_Suffix IS NULL THEN NULL
                WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) >= 1 THEN LEFT(FullFirst_Prefix_Suffix,CHARINDEX(' ',FullFirst_Prefix_Suffix))
                WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) = 0 THEN FullFirst_Prefix_Suffix
                ELSE NULL
            END AS FirstName,
            Suffix,
            CASE 
                WHEN FullFirst_Prefix_Suffix IS NULL THEN NULL
                WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) >= 1 THEN LTRIM(RTRIM(REPLACE(FullFirst_Prefix_Suffix,LEFT(FullFirst_Prefix_Suffix,CHARINDEX(' ',FullFirst_Prefix_Suffix)),'')))
                WHEN LEN(FullFirst_Prefix_Suffix) - LEN(REPLACE(FullFirst_Prefix_Suffix, ' ', '')) = 0 THEN NULL
                ELSE NULL
            END AS FullFirst_Prefix_Suffix_FirstName,
            FullName
        FROM #cte_SpaceFix
        )
        ,cte_Parse_LastName(Prefix, FirstName, LastName, Suffix, MiddleName, FullName) AS
        (
        SELECT Prefix,
            FirstName,
            CASE 
                WHEN FullFirst_Prefix_Suffix_FirstName IS NULL THEN NULL
                WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) >= 1 THEN SUBSTRING(FullFirst_Prefix_Suffix_FirstName,CHARINDEX(' ',FullFirst_Prefix_Suffix_FirstName)+1,8000)
                WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) = 0 THEN FullFirst_Prefix_Suffix_FirstName
                ELSE NULL
            END AS LastName,
            Suffix,
            CASE 
                WHEN FullFirst_Prefix_Suffix_FirstName IS NULL THEN NULL
                WHEN LEN(FullFirst_Prefix_Suffix_FirstName) - LEN(REPLACE(FullFirst_Prefix_Suffix_FirstName, ' ', '')) >= 1 THEN LEFT(FullFirst_Prefix_Suffix_FirstName,CHARINDEX(' ',FullFirst_Prefix_Suffix_FirstName))
                ELSE NULL
            END AS MiddleName,
            FullName
        FROM cte_Parse_FirstName
        )
        SELECT Prefix, FirstName, MiddleName, LastName, Suffix--, FullName 
        FROM cte_Parse_LastName
    
    IF OBJECT_ID('tempdb..#cte_SpaceFix') IS NOT NULL
        DROP TABLE #cte_SpaceFix