我需要一个正则表达式模式,从表格中的推文中提取所有hastags。 我的数据是
var input = [
{startTime: 0, endTime: 1},
{startTime: 3, endTime: 5},
{startTime: 4, endTime: 8},
{startTime: 10, endTime: 12},
{startTime: 9, endTime: 10},
]
function mergeRanges(input){
function compare(a,b){
if(a.startTime < b.startTime){ return -1; }
if(a.startTime > b.startTime){ return 1; }
return 0;
}
var a= input.sort(compare); //sort the input JSON array.
return input.reduce(function(acc, val, index){
if(index != 0){ //if not the first index, compare current to previous object
if(val.startTime < acc.endTime || val.endTime > acc.endTime){
acc[startTime] = acc.startTime;
acc[endTime] = val.endTime;
} else {
acc[startTime] = val.startTime;
acc[endTime] = val.endTime;
}
} else { //if i==0 then just let the object pass
acc = val;
}
return acc;
}, {});
}
mergeRanges(input);
它只会带来#HashTag1而不是#SecondHashtag
我需要像#HashTag1 #SecondHashtag
这样的输出由于
答案 0 :(得分:2)
您可以使用regexp_replace
删除与您的模式不匹配的所有内容。
with t (col) as (
select 'My twwet #HashTag1 and this is the #SecondHashtag sample, #onemorehashtag'
from dual
)
select
regexp_replace(col, '(#\S+\s?)|.', '\1')
from t;
可生产;
#HashTag1 #SecondHashtag #onemorehashtag
regexp_substr
将返回一个匹配项。您可以做的是使用connect by
将字符串转换为表格:
with t (col) as (
select 'My twwet #HashTag1 and this is the #SecondHashtag sample, #onemorehashtag'
from dual
)
select
regexp_substr(col, '#\S+', 1, level)
from t
connect by regexp_substr(col, '#\S+', 1, level) is not null;
返回:
#HashTag1
#SecondHashtag
#onemorehashtag
\ S匹配任何非空格字符。最好使用\ w匹配a-z,A-Z,0-9和_。
由@mathguy和this网站评论: 标签以字母开头,然后允许使用字母数字字符或下划线。
因此,模式#[[:alpha:]]\w*
将更有效。
with t (col) as (
select 'My twwet #HashTag1, this is the #SecondHashtag. #onemorehashtag'
from dual
)
select
regexp_substr(col, '#[[:alpha:]]\w*', 1, level)
from t
connect by regexp_substr(col, '#[[:alpha:]]\w*', 1, level) is not null;
产地:
#HashTag1
#SecondHashtag
#onemorehashtag