我有一个讨厌的数据集:
+----+------------------------------------------------------------------------------+
| PK | Medications |
+----+------------------------------------------------------------------------------+
| 1 | NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil |
| 2 | celexa, lortab, lyrica, ambien, xanax |
| 3 | adipex |
| 4 | opana, roxicodone |
| 5 | adderall |
| 6 | hydrocodone/apap |
| 7 | NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien |
| 8 | prozac |
| 9 | flexeril |
| 10 | soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol |
+----+------------------------------------------------------------------------------+
请记住,这只是2列。
我想要返回的只是整个数据集中不同medications
的1列列表:
NAPROXEN
neurontin
DOCUSATE
HYDROCODONE
BACLOFEN
advil
celexa
lortab
lyrica
ambien
xanax
adipex
opana
最好的方法是什么?
非常感谢你的指导。
答案 0 :(得分:4)
DECLARE @Medications TABLE
(
PK BIGINT PRIMARY KEY IDENTITY(1,1)
,Medications NVARCHAR(4000)
)
/* Populate the table with example records*/
INSERT INTO @Medications(Medications)
VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
,('celexa, lortab, lyrica, ambien, xanax ')
,('adipex')
,('opana roxicodone')
,('adderall')
,('hydrocodone/apap')
,('NEXIUM,METOPROLOL,lipitor,VERAPAMIL,ASPIRIN,WARFARIN,ambien')
,('prozac')
,('flexeril')
,('soma,LITHIUM,MULTI-VITAMIN,fentanyl patch,percocet,PROPANOLOL,tegretol');
选项1
select distinct Medication = ltrim(rtrim(O.splitdata ))
from
(
select *,
cast('<X>'+replace(F.Medications,',','</X><X>')+'</X>' as XML) as xmlfilter
from @Medications F
)F1
cross apply
(
select fdata.D.value('.','varchar(50)') as splitdata
from f1.xmlfilter.nodes('X') as fdata(D)) O
where O.splitdata <> ''
group by O.splitdata
选项2
SELECT DISTINCT display_term
FROM @Medications
CROSS APPLY sys.dm_fts_parser('"' + Medications + '"', 1033, 0,0)
where display_term NOT LIKE 'nn%'
选项3
SELECT DISTINCT
LTRIM(RTRIM(SUBSTRING(Medications, Number ,CHARINDEX(',', Medications + ',', Number ) - Number))) AS Medication
FROM @Medications
JOIN master..spt_values ON Number <= DATALENGTH(Medications) + 1 AND type='P'
AND SUBSTRING(',' + Medications, Number , 1) = ','
答案 1 :(得分:3)
这可能是您正在寻找的解决方案:
;WITH SourceDate(PK,MedicationsXML) AS
(
SELECT PK
,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
FROM @Medications
)
SELECT DISTINCT Medicament
FROM SourceDate
CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List
这是完整的工作示例(我添加重复的记录以显示它只返回不同的值:
SET NOCOUNT ON
GO
/* Declare table that holds the data*/
DECLARE @Medications TABLE
(
PK BIGINT PRIMARY KEY IDENTITY(1,1)
,Medications NVARCHAR(4000)
)
/* Populate the table with exmaple records*/
INSERT INTO @Medications(Medications)
VALUES ('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
,('celexa, lortab, lyrica, ambien, xanax ')
,('adipex')
,('opana, roxicodone')
,('adderall')
,('hydrocodone/apap')
,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
,('prozac')
,('flexeril')
,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')
,('NAPROXEN, neurontin, DOCUSATE, HYDROCODONE, BACLOFEN, advil')
,('celexa, lortab, lyrica, ambien, xanax ')
,('adipex')
,('opana, roxicodone')
,('adderall')
,('hydrocodone/apap')
,('NEXIUM, METOPROLOL, lipitor, VERAPAMIL, ASPIRIN, WARFARIN, ambien')
,('prozac')
,('flexeril')
,('soma, LITHIUM, MULTI-VITAMIN, fentanyl patch, percocet, PROPANOLOL, tegretol')
;WITH SourceDate(PK,MedicationsXML) AS
(
SELECT PK
,CAST(N'<r><![CDATA[' + REPLACE(Medications, ',', ']]></r><r><![CDATA[') + ']]></r>' AS XML)
FROM @Medications
)
SELECT DISTINCT Medicament
FROM SourceDate
CROSS APPLY (SELECT DISTINCT RTRIM(LTRIM(Tbl.Col.value('.', 'nvarchar(250)'))) AS Medicament FROM MedicationsXML.nodes('//r') Tbl(Col)) AS List
SET NOCOUNT OFF
GO
随时提出任何问题。
答案 2 :(得分:2)
如果你可以输出到awk
outputcommand | awk 'BEGIN{FS="|"}{print $3}'|awk 'BEGIN{RS=","}{print $0}'
如果它是一个简单的文本文件,您可以使用cat filename
作为outputcommand或:
awk 'BEGIN{FS="|"}{print $3}' some_filename |awk 'BEGIN{RS=","}{print $0}'