如何返回特定的字符串

时间:2016-08-08 19:06:59

标签: sql sql-server tsql

我有一个包含数千行数据的表。

note
----
LB MN IM 12 18 20  CIN # EW80851R This is tJ
ified KGM nteal icne cinac12345T Cannot locate
NCR  Last verified 06 05 14 cin number ty56478P for the front
OD 00612  Last Verified cin#ad89521Y Me side C 05 05 14
SPC 0   VERIFIED PD IMB cin PU12301R  Last Verified 
PC PO CON FC  D Verified 02/29/2016 No Copy  CIN#FG62301F

每行包含单词cin,后跟几个字符。

例如:CIN # EW80851R, cinac12345T, cin number ty56478P, cin#ad89521Y, cin PU12301R, CIN#FG62301F

如何编写仅返回的查询:

note
----
cinEW80851R
cinac12345T
cinty56478P
cinad89521Y
cinPU12301R
CINFG62301F

2 个答案:

答案 0 :(得分:1)

在XML,字符串解析器和交叉应用的帮助下,您可以处理整个数据集。

您注意到我添加了第二条记录

Declare @YourTable table (ID int,Note varchar(max))
Insert into @YourTable values
(1,'LB MN IM 12 18 20  CIN # EW80851R This is tJ ified KGM nteal icne cinac12345T Cannot locate NCR  Last verified 06 05 14 cin number ty56478P for the front OD 00612  Last Verified cin#ad89521Y Me side C 05 05 14 SPC 0   VERIFIED PD IMB cin PU12301R  Last Verified PC PO CON FC  D Verified 02/29/2016 No Copy  CIN#FG62301F'),
(2,'L This is tJ KGM teal icne Last verified 06 05 14 for the front OD 00612  Last Verified cin#ZZ89256Y Me side C 05 05 14 SPC 0   VERIFIED PD IMB cin ZZPU12301R  Last Verified PC PO CON FC  D Verified 02/29/2016 No Copy  CIN#ZZ62301F')

-- Create a Mapping/Normaization Table
Declare @Map table (MapSeq int,MapFrom varchar(25),MapTo varchar(25))
Insert into @Map values (1,char(13),' '),(2,char(10),' '),(3,' cin number ',' cin'),(4,' cin # ',' cin'),(5,' cin#',' cin'),(6,' cin ',' cin')

-- Convert your Base Data to XML
Declare @XML XML,@String varchar(max)
Set @XML = (Select KeyID=ID,String=+' '+Note+' ' from @YourTable For XML RAW)

-- Convert XML to Varchar(max) and Apply Global Search & Replace 
Select @String = cast(@XML as varchar(max))
Select @String = Replace(@String,MapFrom,MapTo) From @Map Order by MapSeq

-- Convert Back to XML
Select @XML = cast(@String as XML)

-- Generate Final Results
Select A.ID
      ,CIN  = B.Key_Value
 From (
        Select ID        = t.col.value('@KeyID', 'int')
              ,NewString = t.col.value('@String', 'varchar(max)')
         From  @XML.nodes('/row') AS t (col)
      ) A
 Cross Apply (Select * from [dbo].[udf-Str-Parse](A.NewString,' ') where Key_Value like 'cin%') B

返回

ID  CIN
1   cinEW80851R
1   cinac12345T
1   cinty56478P
1   cinad89521Y
1   cinPU12301R
1   cinFG62301F
2   cinZZ89256Y     << Dummy Record
2   cinZZPU12301R   << Dummy Record
2   cinZZ62301F     << Dummy Record

UDF

CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimeter varchar(10))
--Usage: Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--       Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--       Select * from [dbo].[udf-Str-Parse]('id26,id46|id658,id967','|')
--       Select * from [dbo].[udf-Str-Parse]('hello world. It. is. . raining.today','.')

Returns @ReturnTable Table (Key_PS int IDENTITY(1,1), Key_Value varchar(max))
As
Begin
   Declare @XML xml;Set @XML = Cast('<x>' + Replace(@String,@Delimeter,'</x><x>')+'</x>' as XML)
   Insert Into @ReturnTable Select Key_Value = ltrim(rtrim(String.value('.', 'varchar(max)'))) FROM @XML.nodes('x') as T(String)
   Return 
End

答案 1 :(得分:1)

假设您只有#作为混乱符号,并且您需要的部分长度为8,并且此值的部分从3到7个符号是整数 - 您可以尝试使用XML( XQuery)像这样:

DECLARE @xml xml

SELECT @xml = (
    SELECT CAST('<d>'+REPLACE(REPLACE(SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)),' ','#'),'#','</d><d>')+'</d>' as xml)
    FROM YourTable
    FOR XML PATH('')
)

SELECT  t.v.value('.','nvarchar(8)')        
FROM @xml.nodes('/d') as t(v)
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8
    AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0 --check if part of value is int

从表格部分您提供的输出将是:

EW80851R
ac12345T
ty56478P
ad89521Y
PU12301R
FG62301F

编辑(灵感来自@ JohnCappelleti&#39;

更复杂的解决方案:

--Declare variables
DECLARE @xml xml, @string nvarchar(max) = ''  
--Rows becomes one big string
SELECT @string = @string + SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)) +' '
FROM YourTable y
--CTE with all not numeric and not alphabetical charecters for normalization
;WITH Symbols AS (
    SELECT  0 as d,
            CHAR(0) as s,
            1 as isUsed
    UNION ALL
    SELECT  d+1,
            CHAR(d+1),
            CASE WHEN d+1 between 48 and 57 
                    OR d+1 between 65 and 90
                    OR d+1 between 97 and 122  THEN 0 ELSE 1 END
    FROM Symbols
    WHERE d < 255
)
--replace all not numeric and not alphabetic
SELECT @string = REPLACE(@string,s.s,'#')
FROM Symbols s
WHERE isUsed = 1
OPTION(MAXRECURSION 0)
--convert to xml
SELECT @xml= CAST('<d>'+REPLACE(@string,'#','</d><d>')+'</d>' as xml)
--convert XML to table
SELECT  t.v.value('.','nvarchar(8)')        
FROM @xml.nodes('/d') as t(v)
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8
    AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0 
    --check if part of value is int