TSQL语句将名称后缀(Jr,Sr,IV等)移动到另一个字段中

时间:2010-09-14 18:46:46

标签: sql-server tsql

我想有一个TSQL语句将Name Suffix(Jr,Sr.,IV等)移到另一个字段中。

我看到的后缀是 JR SR I II III IV V

以下是数据样本

LastName
BRUNNING, II
BURCH II
BUSS, JR.
CANI III
CHRISTIAN,SR
COLVIN Jr
COWHERD,JR.

我希望后缀从LastName字段移出到另一个名为Suffix的字段。

LastName   Suffix  
BRUNNING   II
BURCH      I     
BUSS       JR
CANI       III
CHRISTIAN  SR
COLVIN     JR
COWHERD    JR

我使用的是SQL Server 2005,可以使用SQL#个功能 任何帮助都会有很大的吸引力。

5 个答案:

答案 0 :(得分:1)

从我的头顶开始,因为你有少量的替换,你可以做这样的事情:

更新[TableName] SET LastName = SUBSTRING(LastName,0,CHARINDEX(lastname,'III')), SUFFIX ='III' CHARINDEX(姓氏,'III')> 0;

答案 1 :(得分:1)

您可能需要进行一些预处理,以使格式更加一致。

考虑删除结束时段并用空格替换所有逗号。 之后,您的样本应如下所示:

LastName
BRUNNING  II
BURCH II
BUSS  JR
CANI III
CHRISTIAN SR
COLVIN Jr
COWHERD JR

然后,您可以识别以“I”,“II”,“III”,“JR”和“SR”结尾的行,并根据其长度删除后缀,并使用您想要的值更新后缀字段。 / p>

答案 2 :(得分:1)

你可以使用SQL#函数做得更好,但是在直接的T-SQL中,你可以去。

这里的主要思想是使用REVERSEPATINDEX解析名称中的最后一个段/标记,然后将其与已知后缀列表进行匹配。

首先是一些测试数据:

IF OBJECT_ID('tempdb..#names') IS NOT NULL DROP TABLE #names
IF OBJECT_ID('tempdb..#suffixes') IS NOT NULL DROP TABLE #suffixes
CREATE TABLE #names (name VARCHAR(32))
CREATE TABLE #suffixes (suffix VARCHAR(32))
GO

INSERT #names VALUES ('BRUNNING, II' )
INSERT #names VALUES ('BURCH II'     )
INSERT #names VALUES ('BUSS, JR.'    )
INSERT #names VALUES ('CANI III'     )
INSERT #names VALUES ('CHRISTIAN,SR' )
INSERT #names VALUES ('COLVIN Jr'    )
INSERT #names VALUES ('COWHERD,JR.'  )
INSERT #names VALUES ('BILLY BOB'    )
INSERT #names VALUES ('JOHNNY'       )

INSERT #suffixes VALUES ('II' )
INSERT #suffixes VALUES ('III')
INSERT #suffixes VALUES ('JR' )
INSERT #suffixes VALUES ('SR' )

然后,内联SELECT版本。请注意使用NULLIF来控制SUBSTRING错误。

SELECT
  name
, left_segments 
, right_segment
, new_name = CASE WHEN b.suffix IS NOT NULL THEN a.left_segments ELSE a.name END
, b.suffix
FROM (
  SELECT 
    name
  , left_segments = CASE WHEN left_segments LIKE '%[ ,]' THEN LEFT(left_segments,LEN(left_segments)-1) ELSE left_segments END
  , right_segment = CASE WHEN right_segment LIKE '%[.]' THEN LEFT(right_segment,LEN(right_segment)-1) ELSE right_segment END
  FROM (
    SELECT * 
    , left_segments = RTRIM(LEFT(RTRIM(name),LEN(name)-NULLIF(PATINDEX('%[ ,]%',REVERSE(RTRIM(name))),0)))
    , right_segment = RIGHT(RTRIM(name),NULLIF(PATINDEX('%[ ,]%',REVERSE(RTRIM(name))),0)-1)
    FROM #names
    ) a
  ) a
LEFT JOIN #suffixes b ON a.right_segment = b.suffix

或者,UPDATE w / local vars:

ALTER TABLE #names ADD 
  left_segments VARCHAR(64)
, right_segment VARCHAR(64)
GO

DECLARE 
  @name VARCHAR(64)
, @len INT
, @last_delim INT
, @left_segments VARCHAR(64)
, @right_segment VARCHAR(64)

UPDATE #names SET 
  @name           = RTRIM(name)
, @len            = LEN(@name)
, @last_delim     = @len-NULLIF(PATINDEX('%[ ,]%',REVERSE(@name)),0)
, @left_segments  = RTRIM(LEFT(@name,@last_delim))
, @right_segment  = RIGHT(@name,@len-@last_delim-1)
, @left_segments  = CASE WHEN @left_segments LIKE '%[ ,]' THEN LEFT(@left_segments,LEN(@left_segments)-1) ELSE @left_segments END
, @right_segment  = CASE WHEN @right_segment LIKE '%[.]'  THEN LEFT(@right_segment,LEN(@right_segment)-1) ELSE @right_segment END
, left_segments   = @left_segments
, right_segment   = @right_segment

SELECT a.*
, new_name = CASE WHEN b.suffix IS NOT NULL THEN a.left_segments ELSE a.name END
, suffix = b.suffix
FROM #names a LEFT JOIN #suffixes b ON a.right_segment = b.suffix

内联SELECT相当方便,但难以阅读和排除故障。我更喜欢带有本地变量的UPDATE用于我以后可能需要返回的任何内容。此外,它使个人编辑更容易应用。

编辑SELECT方法,稍加编辑,并包含在内联表值函数中。内联TVF应该比标量UDF更有效,并且您可以获得多个返回值来启动。

CREATE FUNCTION dbo.ParseNameAndSuffix (@name VARCHAR(64), @ValidSuffixes VARCHAR(512))
RETURNS TABLE AS RETURN (
  SELECT
    left_segments 
  , right_segment
  , new_name = CASE WHEN CHARINDEX(';'+right_segment+';',';'+@ValidSuffixes+';') > 0 THEN a.left_segments ELSE a.name END
  , suffix   = CASE WHEN CHARINDEX(';'+right_segment+';',';'+@ValidSuffixes+';') > 0 THEN a.right_segment END
  FROM (
    SELECT 
      name
    , left_segments = CASE WHEN left_segments LIKE '%[ ,]' THEN LEFT(left_segments,LEN(left_segments)-1) ELSE left_segments END
    , right_segment = CASE WHEN right_segment LIKE '%[.]' THEN LEFT(right_segment,LEN(right_segment)-1) ELSE right_segment END
    FROM (
      SELECT name
      , left_segments = RTRIM(LEFT(name,LEN(name)-NULLIF(PATINDEX('%[ ,]%',REVERSE(name)),0)))
      , right_segment = RIGHT(name,NULLIF(PATINDEX('%[ ,]%',REVERSE(name)),0)-1)
      FROM (SELECT name = LTRIM(RTRIM(@name))) a
      ) a
    ) a
  )
GO

SELECT * FROM #names a
CROSS APPLY dbo.ParseNameAndSuffix(a.name,'II;III;JR;SR') b

答案 3 :(得分:1)

如果CLR不是一个选项,那么彼得建议的很好。但是,既然你说你有SQL#,那么你可以使用SQL#中的RegEx_MatchSimple函数以更简单,更实用的方式完成这项工作。我将以彼得的例子作为起点来解释。

我们可以使用与Peter使用的几乎相同的SQL来设置测试,但在这种情况下,我将创建Suffixes表作为真实(非Temp)表,以便我可以在下面的一个示例TVF中引用它。也许您可能希望将它们保存在表中而不是作为参数传入,但我将显示这两种样式。我还在#Names表中添加了两个名称,以显示如何使用RegEx来帮助捕获数据中的变体(额外的空格和/或逗号):

USE [tempdb]
GO
IF OBJECT_ID('tempdb..#Names') IS NOT NULL DROP TABLE #Names
IF OBJECT_ID('tempdb.dbo.Suffixes') IS NOT NULL DROP TABLE dbo.Suffixes
CREATE TABLE #Names (LastName VARCHAR(32))
CREATE TABLE dbo.Suffixes (Suffix VARCHAR(32))
GO

INSERT #Names VALUES ('BRUNNING, II'  )
INSERT #Names VALUES ('BURCH II'      )
INSERT #Names VALUES ('BUSS, JR.'     )
INSERT #Names VALUES ('CANI III'      )
INSERT #Names VALUES ('CHRISTIAN,SR'  )
INSERT #Names VALUES ('COLVIN Jr'     )
INSERT #Names VALUES ('COWHERD,JR.'   )
INSERT #Names VALUES ('BILLY BOB'     )
INSERT #Names VALUES ('JOHNNY'        )
INSERT #Names VALUES ('BRUNNING, II ' )
INSERT #Names VALUES ('SMITH ,, SR. ' )

INSERT dbo.Suffixes VALUES ('II' )
INSERT dbo.Suffixes VALUES ('III')
INSERT dbo.Suffixes VALUES ('JR' )
INSERT dbo.Suffixes VALUES ('SR' )

首先要展示的是使用上述数据的简单示例。在这种情况下,我使用CTE生成与名称匹配的列表,然后过滤掉与任何内容不匹配的行。我将[FullMatch]字段括在冒号中,这样就可以更容易地看到捕获的前导和尾随空格:

;WITH cte AS (
    SELECT  names.LastName,
            [SQL#].[SQL#].RegEx_MatchSimple(names.LastName, '(([ ]*,+[ ]*)|([ ]+))' + suff.Suffix + '[.]*[ ]*$', 1, 'IgnoreCase') AS [FullMatch],
            suff.suffix
    FROM    #Names names
    CROSS JOIN tempdb.dbo.Suffixes suff
)
SELECT  cte.LastName, ':' + cte.FullMatch + ':' AS [FullMatch], REPLACE(cte.LastName, cte.FullMatch, '') AS [Replacement], cte.Suffix
FROM    cte
WHERE   cte.FullMatch <> ''

你可以将这个理论带到直接UPDATE语句:

;WITH cte AS (
    SELECT  names.LastName,
            [SQL#].[SQL#].RegEx_MatchSimple(names.LastName, '(([ ]*,+[ ]*)|([ ]+))' + suff.Suffix + '[.]*[ ]*$', 1, 'IgnoreCase') AS [FullMatch],
            suff.Suffix
    FROM    MyTable names
    CROSS JOIN NameSuffixes suff
)
UPDATE  mt
SET     mt.LastName = REPLACE(cte.LastName, cte.FullMatch, ''),
        mt.NameSuffix = cte.Suffix
FROM    MyTable mt
INNER JOIN  cte
        ON  cte.LastName = mt.LastName
WHERE   cte.FullMatch <> ''

您已将此请求作为一项功能,如下所示:

CREATE FUNCTION dbo.ParseNameAndSuffix (@Name VARCHAR(64))
RETURNS TABLE AS RETURN
(
    WITH cte AS (
        SELECT  @Name AS [LastName],
                [SQL#].[SQL#].RegEx_MatchSimple(@Name, '(([ ]*,+[ ]*)|([ ]+))' + suff.Suffix + '[.]*[ ]*$', 1, 'IgnoreCase') AS [FullMatch],
                suff.Suffix
        FROM    tempdb.dbo.Suffixes suff
    )
    SELECT  cte.LastName, cte.FullMatch, REPLACE(cte.LastName, cte.FullMatch, '') AS [Replacement], cte.Suffix
    FROM    cte
    WHERE   cte.FullMatch <> ''
)
GO

可以像这样使用:

SELECT  *   
FROM    #Names a
CROSS APPLY dbo.ParseNameAndSuffix(a.LastName) b

-- or --

UPDATE  mt
SET     mt.LastName = REPLACE(parse.LastName, parse.Found, ''),
        mt.NameSuffix = parse.Suffix
FROM    MyTable mt
CROSS APPLY dbo.ParseNameAndSuffix(mt.LastName) parse

为了更接近地匹配Peter给出的后缀作为参数传递的示例,可以使用SQL#中的String_Split函数完成以下操作:

CREATE FUNCTION dbo.ParseNameAndSuffix2 (@Name VARCHAR(64), @Suffixes VARCHAR(MAX))
RETURNS TABLE AS RETURN
(
    WITH cte AS (
        SELECT  @Name AS [LastName],
                [SQL#].[SQL#].RegEx_MatchSimple(@Name, '(([ ]*,+[ ]*)|([ ]+))' + suff.Val + '[.]*[ ]*$', 1, 'IgnoreCase') AS [FullMatch],
                suff.Val AS [Suffix]
        FROM    [SQL#].[SQL#].String_Split(@Suffixes, ';', 2) suff
    )
    SELECT  cte.LastName, cte.FullMatch, REPLACE(cte.LastName, cte.FullMatch, '') AS [Replacement], cte.Suffix
    FROM    cte
    WHERE   cte.FullMatch <> ''
)
GO

然后可以按如下方式使用:

SELECT * FROM #Names a
CROSS APPLY dbo.ParseNameAndSuffix2(a.LastName, 'II;III;JR;SR') b

-- or --

UPDATE  mt
SET     mt.LastName = REPLACE(parse.LastName, parse.Found, ''),
        mt.NameSuffix = parse.Suffix
FROM    MyTable mt
CROSS APPLY dbo.ParseNameAndSuffix2(mt.LastName, 'II;III;JR;SR') parse

答案 4 :(得分:0)

我认为你最好的选择是列表中的最后一个单词(不包括标点符号)的RegEx匹配(JR,Sr,III等)

查看此博客

http://blogs.msdn.com/b/khen1234/archive/2005/05/11/416392.aspx