我有一个包含数千行数据的表。
note
----
LB MN IM 12 18 20 CIN # EW80851R This is tJ
ified KGM nteal icne cinac12345T Cannot locate
NCR Last verified 06 05 14 cin number ty56478P for the front
OD 00612 Last Verified cin#ad89521Y Me side C 05 05 14
SPC 0 VERIFIED PD IMB cin PU12301R Last Verified
PC PO CON FC D Verified 02/29/2016 No Copy CIN#FG62301F
每行包含单词cin
,后跟几个字符。
例如:CIN # EW80851R, cinac12345T, cin number ty56478P, cin#ad89521Y, cin PU12301R, CIN#FG62301F
如何编写仅返回的查询:
note
----
cinEW80851R
cinac12345T
cinty56478P
cinad89521Y
cinPU12301R
CINFG62301F
答案 0 :(得分:1)
在XML,字符串解析器和交叉应用的帮助下,您可以处理整个数据集。
您注意到我添加了第二条记录
Declare @YourTable table (ID int,Note varchar(max))
Insert into @YourTable values
(1,'LB MN IM 12 18 20 CIN # EW80851R This is tJ ified KGM nteal icne cinac12345T Cannot locate NCR Last verified 06 05 14 cin number ty56478P for the front OD 00612 Last Verified cin#ad89521Y Me side C 05 05 14 SPC 0 VERIFIED PD IMB cin PU12301R Last Verified PC PO CON FC D Verified 02/29/2016 No Copy CIN#FG62301F'),
(2,'L This is tJ KGM teal icne Last verified 06 05 14 for the front OD 00612 Last Verified cin#ZZ89256Y Me side C 05 05 14 SPC 0 VERIFIED PD IMB cin ZZPU12301R Last Verified PC PO CON FC D Verified 02/29/2016 No Copy CIN#ZZ62301F')
-- Create a Mapping/Normaization Table
Declare @Map table (MapSeq int,MapFrom varchar(25),MapTo varchar(25))
Insert into @Map values (1,char(13),' '),(2,char(10),' '),(3,' cin number ',' cin'),(4,' cin # ',' cin'),(5,' cin#',' cin'),(6,' cin ',' cin')
-- Convert your Base Data to XML
Declare @XML XML,@String varchar(max)
Set @XML = (Select KeyID=ID,String=+' '+Note+' ' from @YourTable For XML RAW)
-- Convert XML to Varchar(max) and Apply Global Search & Replace
Select @String = cast(@XML as varchar(max))
Select @String = Replace(@String,MapFrom,MapTo) From @Map Order by MapSeq
-- Convert Back to XML
Select @XML = cast(@String as XML)
-- Generate Final Results
Select A.ID
,CIN = B.Key_Value
From (
Select ID = t.col.value('@KeyID', 'int')
,NewString = t.col.value('@String', 'varchar(max)')
From @XML.nodes('/row') AS t (col)
) A
Cross Apply (Select * from [dbo].[udf-Str-Parse](A.NewString,' ') where Key_Value like 'cin%') B
返回
ID CIN
1 cinEW80851R
1 cinac12345T
1 cinty56478P
1 cinad89521Y
1 cinPU12301R
1 cinFG62301F
2 cinZZ89256Y << Dummy Record
2 cinZZPU12301R << Dummy Record
2 cinZZ62301F << Dummy Record
UDF
CREATE FUNCTION [dbo].[udf-Str-Parse] (@String varchar(max),@Delimeter varchar(10))
--Usage: Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
-- Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
-- Select * from [dbo].[udf-Str-Parse]('id26,id46|id658,id967','|')
-- Select * from [dbo].[udf-Str-Parse]('hello world. It. is. . raining.today','.')
Returns @ReturnTable Table (Key_PS int IDENTITY(1,1), Key_Value varchar(max))
As
Begin
Declare @XML xml;Set @XML = Cast('<x>' + Replace(@String,@Delimeter,'</x><x>')+'</x>' as XML)
Insert Into @ReturnTable Select Key_Value = ltrim(rtrim(String.value('.', 'varchar(max)'))) FROM @XML.nodes('x') as T(String)
Return
End
答案 1 :(得分:1)
假设您只有#
作为混乱符号,并且您需要的部分长度为8
,并且此值的部分从3到7个符号是整数 - 您可以尝试使用XML( XQuery)像这样:
DECLARE @xml xml
SELECT @xml = (
SELECT CAST('<d>'+REPLACE(REPLACE(SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)),' ','#'),'#','</d><d>')+'</d>' as xml)
FROM YourTable
FOR XML PATH('')
)
SELECT t.v.value('.','nvarchar(8)')
FROM @xml.nodes('/d') as t(v)
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8
AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0 --check if part of value is int
从表格部分您提供的输出将是:
EW80851R
ac12345T
ty56478P
ad89521Y
PU12301R
FG62301F
编辑(灵感来自@ JohnCappelleti&#39;
更复杂的解决方案:
--Declare variables
DECLARE @xml xml, @string nvarchar(max) = ''
--Rows becomes one big string
SELECT @string = @string + SUBSTRING(Note,CHARINDEX('cin',Note)+3,LEN(Note)) +' '
FROM YourTable y
--CTE with all not numeric and not alphabetical charecters for normalization
;WITH Symbols AS (
SELECT 0 as d,
CHAR(0) as s,
1 as isUsed
UNION ALL
SELECT d+1,
CHAR(d+1),
CASE WHEN d+1 between 48 and 57
OR d+1 between 65 and 90
OR d+1 between 97 and 122 THEN 0 ELSE 1 END
FROM Symbols
WHERE d < 255
)
--replace all not numeric and not alphabetic
SELECT @string = REPLACE(@string,s.s,'#')
FROM Symbols s
WHERE isUsed = 1
OPTION(MAXRECURSION 0)
--convert to xml
SELECT @xml= CAST('<d>'+REPLACE(@string,'#','</d><d>')+'</d>' as xml)
--convert XML to table
SELECT t.v.value('.','nvarchar(8)')
FROM @xml.nodes('/d') as t(v)
WHERE LEN(t.v.value('.','nvarchar(8)')) = 8 --check if length = 8
AND ISNULL(t.v.value('substring(string(.), 3, 5) cast as xs:int ?','int'),0) != 0
--check if part of value is int