假设我有一个名为PHRASES
的表,其中包含一些文本字符串
+--+---------------+
|ID|PHRASE |
+--+---------------+
|0 |"HELLO BYE YES"|
+--+---------------+
|1 |"NO WHY NOT" |
+--+---------------+
|2 |"NO YES" |
+--+---------------+
我想在OCCURRENCE
列中添加以下每个字词的次数,让我们称之为KEYWORDS
:
+--------+----------+
|KEYWORD |OCCURRENCE|
+--------+----------+
|"YES" |NULL |
+--------+----------+
|"NO" |NULL |
+--------+----------+
|"HELLO" |NULL |
+--------+----------+
|"CHEESE"|NULL |
+--------+---------+
我现在想编写一个将KEYWORDS
更新为以下内容的查询:
+--------+----------+
|KEYWORD |OCCURRENCE|
+--------+----------+
|"YES" |2 |
+--------+----------+
|"NO" |2 |
+--------+----------+
|"HELLO" |1 |
+--------+----------+
|"CHEESE"|0 |
+--------+----------+
请注意,我已经有一个名为dbo.RegExIsMatch
的函数可以处理字符串匹配,如果参数1与参数2中的字符串匹配则返回1
:
UPDATE KEYWORDS SET OCCURRENCE =
(
SELECT SUM
(
-- the following returns 1 if the keyword exists in the phrase, or 0 otherwise
CASE WHEN dbo.RegExIsMatch('.*' + KEYWORDS.KEYWORD + '.*',PHRASES.PHRASE,1) = 1 THEN 1 ELSE 0 END
)
FROM PHRASES
CROSS JOIN KEYWORDS
)
这不起作用,它最终只会用相同的数字填充每一行。我敢肯定这是一个简单的问题,我只是在努力让我的思绪深入人心。
答案 0 :(得分:0)
您的查询有三个不同的表,但问题只有两个。这是你的意思吗?
UPDATE Keywords
SET OCCURRENCE = (SELECT SUM(CASE WHEN dbo.RegExIsMatch('.*' + KEYWORDS.KEYWORD + '.*',PHRASES.PHRASE,1) = 1
THEN 1 ELSE 0
END)
FROM PHRASES
);
否则,如果您有三个表,则需要将子查询与外表关联。
答案 1 :(得分:0)
这似乎有效
MERGE INTO KEYWORDS masterList
USING (
SELECT COUNT(*) AS OCCURRENCE,KEYWORDS.KEYWORD AS KEYWORD FROM
KEYWORDS AS keywordList
CROSS JOIN PHRASES AS phraseList
WHERE (dbo.RegExIsMatch('.*' + keywordList.KEYWORD + '.*',phraseList.PHRASE,1) = 1)
GROUP BY KEYWORD
) frequencyList
ON (masterList.KEYWORD = frequencyList.KEYWORD)
WHEN MATCHED THEN
UPDATE SET masterList.OCCURRENCE = frequencyList.OCCURRENCE;
答案 2 :(得分:0)
因为我没有你的函数dbo.RegExIsMatch
进行测试,所以我只使用了sqlserver-out-of-the-box-stuff来提出这个稍微不同的例子。
您可能在任何地方都获得了1
的计数,因为您使用的SUM
没有GROUP BY
。
请注意,这不是100%准确,因为我没有使用正则表达式而只是“简单的愚蠢”字符串函数,但是如果你要修改你的正则表达式函数做一个正则表达式替换你可以替换我使用该REPLACE
来调用,这样可以获得正确的结果。
另一个小改动是为所有关键字设置0
而不是NULL
的初始值。
另请注意,我不再使用CROSS JOIN
,而是对包含该单词的短语进行连接,这样就不会多次覆盖这些事件,这也是我想要发生的情况。
INSERT INTO KEYWORDS (KEYWORD, OCCURRENCE)
SELECT 'YES', 0
UNION
SELECT 'NO', 0
UNION
SELECT 'HELLO', 0
UNION
SELECT 'CHEESE', 0;
UPDATE KEYWORDS SET KEYWORDS.OCCURRENCE = KEYWORDS.OCCURRENCE +
(LEN(PHRASES.PHRASE) - LEN(REPLACE(PHRASES.PHRASE, KEYWORDS.KEYWORD, ''))) / LEN(KEYWORDS.KEYWORD)
FROM KEYWORDS
INNER JOIN PHRASES ON CHARINDEX(KEYWORDS.KEYWORD, PHRASES.PHRASE) > 0;
PS:对于那个simple stupid
字符串计数,我使用了answer(including the comment)
答案 3 :(得分:-1)
尝试使用此方法从我这边工作
-------------表格创建
declare @PHRASE table (ID int,PHRASE varchar(max))
insert into @PHRASE
select 0,'"Hello Bye Yes"'
union all
select 1,'"No Why Not"'
union all
select 2,'"No Yes"'
select * from @PHRASE
declare @Keywords table (KEYWORD varchar(10),OCCURANCE int)
insert into @Keywords
select 'YES',null
union all
select 'NO',null
union all
select 'HELLO',null
union all
select 'CHEESE',null
select * from @Keywords
----------Script for requirement
create table #table (name varchar(max),)
DECLARE @str VARCHAR(25)
DECLARE curs_Fp CURSOR FOR
SELECT c.PHRASE FROM @PHRASE c
OPEN curs_Fp
FETCH NEXT FROM curs_Fp INTO @str
WHILE @@FETCH_STATUS = 0
BEGIN
while patindex('%["]%',@str) > 0
SET @str = REPLACE( @str, SUBSTRING( @str, patindex('%["]%',@str), 1 ),'')
set @str = @str+' '
WHILE CHARINDEX(' ', @str) > 0
BEGIN
DECLARE @tmpstr VARCHAR(50)
SET @tmpstr = SUBSTRING(@str, 1, ( CHARINDEX(' ', @str) - 1 ))
insert into #table (name) select @tmpstr
SET @str = SUBSTRING(@str, CHARINDEX(' ', @str) + 1, LEN(@str))
END
FETCH NEXT FROM curs_Fp INTO @str
END
CLOSE curs_Fp
DEALLOCATE curs_Fp
update y
set y.OCCURANCE = isnull(x.occurance,0)
from
@Keywords y
left join
--#table x on y.keyword = x.name
(select a.name,count(a.name) occurance from #table a group by a.name) x on y.KEYWORD = x.name
select * from @Keywords
drop table #table