从单个数据点解析数据

时间:2012-11-29 23:59:20

标签: sql sql-server sql-server-2008 tsql

我有一个讨厌的数据集:

+----+------------------------------------------------------------------------------+
| PK |                                 Medications                                  |
+----+------------------------------------------------------------------------------+
|  1 | NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil                  |
|  2 | celexa, lortab, lyrica, ambien, xanax                                        |
|  3 | adipex                                                                       |
|  4 | opana, roxicodone                                                            |
|  5 | adderall                                                                     |
|  6 | hydrocodone/apap                                                             |
|  7 | NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien            |
|  8 | prozac                                                                       |
|  9 | flexeril                                                                     |
| 10 | soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol |
+----+------------------------------------------------------------------------------+

请记住,这只是2列。

我想要返回的只是整个数据集中不同medications的1列列表:

NAPROXEN
 neurontin
 DOCUSATE
 HYDROCODONE
 BACLOFEN
 advil
celexa
 lortab
 lyrica
 ambien
 xanax
adipex
opana

最好的方法是什么?

非常感谢你的指导。

3 个答案:

答案 0 :(得分:4)

DECLARE @Medications TABLE
(
     PK BIGINT PRIMARY KEY IDENTITY(1,1)
    ,Medications NVARCHAR(4000)
)

/* Populate the table with example records*/
INSERT INTO @Medications(Medications) 
VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
      ,('celexa, lortab, lyrica, ambien, xanax   ')
      ,('adipex')
      ,('opana roxicodone')
      ,('adderall')
      ,('hydrocodone/apap')
      ,('NEXIUM,METOPROLOL,lipitor,VERAPAMIL,ASPIRIN,WARFARIN,ambien')
      ,('prozac')
      ,('flexeril')
      ,('soma,LITHIUM,MULTI-VITAMIN,fentanyl patch,percocet,PROPANOLOL,tegretol');

选项1

select distinct Medication = ltrim(rtrim(O.splitdata ))
from
(
select *,
cast('<X>'+replace(F.Medications,',','</X><X>')+'</X>' as XML) as xmlfilter
from @Medications F
)F1
cross apply
( 
 select fdata.D.value('.','varchar(50)') as splitdata 
 from f1.xmlfilter.nodes('X') as fdata(D)) O
where O.splitdata <> ''
group by O.splitdata

选项2

SELECT DISTINCT display_term
FROM @Medications
CROSS APPLY sys.dm_fts_parser('"' + Medications + '"', 1033, 0,0)
where display_term NOT LIKE 'nn%'

选项3

SELECT  DISTINCT
    LTRIM(RTRIM(SUBSTRING(Medications, Number ,CHARINDEX(',', Medications + ',', Number ) - Number))) AS Medication 
FROM @Medications 
JOIN master..spt_values ON Number <= DATALENGTH(Medications) + 1  AND type='P'
AND SUBSTRING(',' + Medications, Number , 1) = ','  

答案 1 :(得分:3)

这可能是您正在寻找的解决方案:

;WITH SourceDate(PK,MedicationsXML) AS
(
    SELECT PK
          ,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
    FROM @Medications
)
SELECT DISTINCT Medicament
FROM SourceDate
CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List

这是完整的工作示例(我添加重复的记录以显示它只返回不同的值:

SET NOCOUNT ON
GO

    /* Declare table that holds the data*/
    DECLARE @Medications TABLE
    (
         PK BIGINT PRIMARY KEY IDENTITY(1,1)
        ,Medications NVARCHAR(4000)
    )

    /* Populate the table with exmaple records*/
    INSERT INTO @Medications(Medications)
    VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
          ,('celexa, lortab, lyrica, ambien, xanax   ')
          ,('adipex')
          ,('opana, roxicodone')
          ,('adderall')
          ,('hydrocodone/apap')
          ,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
          ,('prozac')
          ,('flexeril')
          ,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')
          ,('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
          ,('celexa, lortab, lyrica, ambien, xanax   ')
          ,('adipex')
          ,('opana, roxicodone')
          ,('adderall')
          ,('hydrocodone/apap')
          ,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
          ,('prozac')
          ,('flexeril')
          ,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')

    ;WITH SourceDate(PK,MedicationsXML) AS
    (
        SELECT PK
              ,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
        FROM @Medications
    )
    SELECT DISTINCT Medicament
    FROM SourceDate
    CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List


SET NOCOUNT OFF
GO

随时提出任何问题。

答案 2 :(得分:2)

如果你可以输出到awk

outputcommand | awk 'BEGIN{FS="|"}{print $3}'|awk 'BEGIN{RS=","}{print $0}'

如果它是一个简单的文本文件,您可以使用cat filename作为outputcommand或:

awk 'BEGIN{FS="|"}{print $3}' some_filename |awk 'BEGIN{RS=","}{print $0}'