如何从T-SQL中的字符串中提取主题标签

时间:2017-02-13 09:55:12

标签: sql sql-server sql-server-2008 tsql parsing

Declare @text='i #want to extract all #hastag out of this string, #delhi #Traffic'

所需的输出将在字符串中:"#want,#hastag,#delhi,#Traffic"或表。

5 个答案:

答案 0 :(得分:7)

尝试这样

Declare @text VARCHAR(100)='i #want to extract all #hastag out of this string, #delhi #Traffic';

WITH Casted(ToXml) AS (SELECT CAST('<x>' + REPLACE((SELECT @text AS [*] FOR XML PATH('')),' ','</x><x>') + '</x>' AS XML))
SELECT SUBSTRING(x.value('.','nvarchar(max)'),2,1000)
FROM Casted
CROSS APPLY ToXml.nodes('x[substring((./text())[1],1,1)="#"]') AS A(x)

结果(我已切断了#,如果需要,只需将外部SUBSTRING带走)

want
hastag
delhi
Traffic

或者作为预期输出的字符串:

尝试这样

Declare @text VARCHAR(100)='i #want to extract all #hastag out of this string, #delhi #Traffic';

WITH Casted(ToXml) AS (SELECT CAST('<x>' + REPLACE((SELECT @text AS [*] FOR XML PATH('')),' ','</x><x>') + '</x>' AS XML))
SELECT STUFF(
(
SELECT ','+x.value('.','nvarchar(max)')
FROM Casted
CROSS APPLY ToXml.nodes('x[substring((./text())[1],1,1)="#"]') AS A(x)
FOR XML PATH(''),TYPE
).value('.','nvarchar(max)'),1,1,'')

结果

#want,#hastag,#delhi,#Traffic

答案 1 :(得分:1)

这主要是SQL服务器的分割功能代码,你可以在网上的任何地方找到它。基本上,我会首先使用分割功能,用文字分割整个句子,然后选择那些得到的#&#39;#&#39;与他们一起使用

declare @String nvarchar(200) = 'i #want to extract all #hastag
                 out of this string, #delhi #Traffic'
DECLARE @Delimiter char(1) = ' '

if object_id('tempdb..#slicedWords') is not null drop table #slicedWords
create table #slicedWords (word nvarchar(100))

declare @idx int
declare @slice varchar(8000)

select @idx = 1
if len(@String)<1 or @String is null return

while @idx!= 0
begin
set @idx = charindex(@Delimiter,@String)
    if @idx!=0
        set @slice = left(@String,@idx - 1)
    else
        set @slice = @String

    if(len(@slice)>0)
        insert
        into #slicedWords(word) values(@slice)

    set @String = right(@String,len(@String) - @idx)

    if len(@String) = 0 
        break
end

select * from #slicedWords where word like '%#%'

或者,如果您想要单个字符串,请使用

更改最后一个选择
select  STUFF((SELECT ', ' + word 
                from #slicedWords where word like '%#%'
               FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'),1,1,'')

答案 2 :(得分:0)

使用XML的解决方案:

DECLARE @text nvarchar(max)='i #want to extract all #hastag out of this string, #delhi #Traffic'

SELECT STUFF( -- is used to separate all hashtags with commas
    (
        SELECT ',' + t.c.value('.','nvarchar(max)')
        FROM (
            --In this part we convert input text into XML
            SELECT CAST('<a>'+REPLACE((SELECT @text as [*] FOR XML PATH('')),' ','</a><a>')+'</a>' as xml) as x
            ) as x
        CROSS APPLY x.nodes('/a') as t(c) 
        WHERE t.c.exist('. [contains(., "#")]') = 1 --check if each part contains #
        FOR XML PATH('')
    ),1,1,'')

输出:

#want,#hastag,#delhi,#Traffic

答案 3 :(得分:0)

使用Numbers

的另一种方法
Declare @text varchar(max)='i #want to extract all #hastag out of this string, #delhi #Traffic'


select  
substring(@text,number+1, 
case when (charindex(' ',@text,number+1)-number+1)<=0 then 100 else charindex(' ',@text,number+1)-number end )
from
numbers
where number<len(@text) and substring(@text,number,1)=' '
and substring(@text,number+1,1)='#'  

输出

#want 
#hastag 
#delhi 
#Traffic

答案 4 :(得分:0)

您可以尝试这种更简单的方法:

select * 
from string_split('Hola adios #Hashtag Hello Good #Morning', ' ')
where value like '#%'