我有一个包含单词列表和相关wordForms的表。表中的典型数据如下所示。注意WordForms的一些列结束,例如有些只是以最后一个字形词结尾
Id Word WordForms
1 abandon abandoned, abandoning, abandonment, abandons
2 abstract abstraction, abstractions, abstractly, abstracts, e.g.
以下是源数据表的布局:
CREATE TABLE [dbo].[TempWords]
(
[Id] INT IDENTITY (1, 1) NOT NULL,
[Word] NVARCHAR (MAX) NOT NULL,
[WordForms] NVARCHAR (MAX) NULL,
)
我想使用这些数据来填充两个表。我知道使用SQL INSERT INTO,但我认为只能帮助我使用一个表。我想做的是取第一个Word,将其放入Words表中,然后将现在用逗号分隔的字形分开,并将它们放入WordForms表中。
CREATE TABLE [dbo].[Words]
(
[WordId] INT IDENTITY (1, 1) NOT NULL,
[Word] NVARCHAR (MAX) NOT NULL
)
CREATE TABLE [dbo].[WordForms]
(
[Id] INT IDENTITY (1, 1) NOT NULL,
[WordId] INT NOT NULL,
[Text] NVARCHAR (MAX) NULL,
)
有人可以给我一些关于如何做到这一点的提示吗?
答案 0 :(得分:7)
首先,您可以创建UDF函数来分隔CSV值。
CREATE FUNCTION dbo.fn_Split (
@InputString VARCHAR(8000),
@Delimiter VARCHAR(50)
)
RETURNS @Items TABLE (
Item VARCHAR(8000)
)
AS
BEGIN
IF @Delimiter = ' '
BEGIN
SET @Delimiter = ','
SET @InputString = REPLACE(@InputString, ' ', @Delimiter)
END
IF (@Delimiter IS NULL OR @Delimiter = '')
SET @Delimiter = ','
--INSERT INTO @Items VALUES (@Delimiter) -- Diagnostic
--INSERT INTO @Items VALUES (@InputString) -- Diagnostic
DECLARE @Item VARCHAR(8000)
DECLARE @ItemList VARCHAR(8000)
DECLARE @DelimIndex INT
SET @ItemList = @InputString
SET @DelimIndex = CHARINDEX(@Delimiter, @ItemList, 0)
WHILE (@DelimIndex != 0)
BEGIN
SET @Item = SUBSTRING(@ItemList, 0, @DelimIndex)
INSERT INTO @Items VALUES (@Item)
-- Set @ItemList = @ItemList minus one less item
SET @ItemList = SUBSTRING(@ItemList, @DelimIndex+1, LEN(@ItemList)-@DelimIndex)
SET @DelimIndex = CHARINDEX(@Delimiter, @ItemList, 0)
END -- End WHILE
IF @Item IS NOT NULL -- At least one delimiter was encountered in @InputString
BEGIN
SET @Item = @ItemList
INSERT INTO @Items VALUES (@Item)
END
-- No delimiters were encountered in @InputString, so just return @InputString
ELSE INSERT INTO @Items VALUES (@InputString)
RETURN
END -- End Function
GO
然后,您可以使用以下INSERT语句来填充表。
INSERT INTO [Words]
SELECT Word FROM TempWords
INSERT INTO WordForms
SELECT
W.WordId,
LTRIM(RTRIM(FNT.Item)) AS Item
FROM TempWords AS TW
INNER JOIN [Words] AS W
ON TW.[Word]=W.[Word]
CROSS APPLY fn_Split(REPLACE(TW.WordForms,', e.g.',''),',') AS FNT
SELECT * FROM [Words]
SELECT * FROM WordForms
答案 1 :(得分:5)
您可以在第一个表中插入单词,而不是解析单词表单,并将它们插入子表中,并链接到父表。
可以通过加入word
列(我猜它是唯一的)或通过执行一些MERGE + OUTPUT
事情来获得父表的链接,只需一步SOURCE.ID
(来自@words_csv)和INSERTED.ID
。你喜欢的任何方式。
解析也可以通过多种方式实现,请查看此示例(实际上我不建议使用解析sql )。
DECLARE @words_csv TABLE (Id INT IDENTITY(1, 1), Word VARCHAR(100), WordForms VARCHAR(1000))
INSERT INTO @words_csv(word, wordforms)
VALUES
('abandon', 'abandoned, abandoning, abandonment, abandons, e.g.'),
('abstract', 'abstraction, abstractions, abstractly, abstracts')
--INSERT INTO [dbo].[Words](word)
--SELECT w.word
--FROM @words_csv w
;WITH word_forms_extracted AS
(
SELECT w.id,
w.word,
ltrim(rtrim(cast(case when CHARINDEX(',', w.WordForms) > 0 then substring(w.wordforms, 1, CHARINDEX(',', w.WordForms)-1) end AS VARCHAR(1000)))) wordform,
stuff(w.wordforms, 1, CHARINDEX(',', w.WordForms), '') wordforms
FROM @words_csv w
UNION ALL
SELECT w.id,
w.word,
ltrim(rtrim(cast(case when CHARINDEX(',', wfe.WordForms) > 0 then substring(wfe.wordforms, 1, CHARINDEX(',', wfe.WordForms)-1) else wfe.wordforms end AS VARCHAR(1000)))) wordform,
case when CHARINDEX(',', wfe.WordForms) > 0 then stuff(wfe.wordforms, 1, CHARINDEX(',', wfe.WordForms), '') ELSE '' end wordforms
FROM @words_csv w
INNER JOIN word_forms_extracted wfe
ON wfe.id = w.id
WHERE wfe.wordforms != ''
)
SELECT wf.id, wf.word, wf.wordform
FROM word_forms_extracted wf
--INNER JOIN [dbo].[Words] w
--ON w.word = wf.word
WHERE wf.wordform NOT IN ('', 'e.g.')
ORDER BY wf.id, wf.wordform
OPTION(MAXRECURSION 1000)
最终SELECT
可轻松修改为INSERT INTO dbo.WordForms (...) SELECT ...
通过加入dbo.Words
列,您可以在此处获得word
的链接。
答案 2 :(得分:3)
借助XML:
INSERT INTO [dbo].[Words]
SELECT DISTINCT [Word]
FROM [dbo].[TempWords]
DECLARE @xml xml
SELECT @xml = (
SELECT CAST('<row><word>'+WORD+'</word><w>' + REPLACE(WordForms,', ','</w><w>') +'</w></row>' as xml)
FROM [dbo].[TempWords]
FOR XML PATH('')
)
INSERT INTO [dbo].[WordForms]
SELECT w.[WordId],
t.v.value('.','nvarchar(max)') as [Text]
FROM @xml.nodes('/row/w') as t(v)
LEFT JOIN [dbo].[Words] w
ON t.v.value('../word[1]','nvarchar(max)') = w.[Word]
在[dbo].[Words]
WordId Word
1 abandon
2 abstract
在[dbo].[WordForms]
Id WordId Text
1 1 abandoned
2 1 abandoning
3 1 abandonment
4 1 abandons
5 2 abstraction
6 2 abstractions
7 2 abstractly
8 2 abstracts