我想在JavaScript中识别与Regex的足球比赛:
1
15/06 16:00
Brasília
Brasilien
3:0 (1:0)
Japan
2
23/06 16:00
Recife
Uruguay
-
Tahiti
本文包含:
我已经使用http://regex101.com/网站构建了一个正则表达式:
(\d\d\/\d\d)\s(\d\d:\d\d)\s(.+)\s\s\s(.+)\s(?:-|(\d):(\d)\s\(\d:\d\))\s(.+)
这个正则表达式应该捕获两种选择(有分数和没有分数) 以下是整个测试内容的链接: http://regex101.com/r/bF3lU4
使用NodeJS的JavaScript代码:
function CreateMatchesFromString(data)
{
var re = /(\d\d\/\d\d)\s(\d\d:\d\d)\s(.+)\s\s\s(.+)\s(?:-|(\d):(\d)\s\(\d:\d\))\s(.+)/g;
var myArray;
while ((myArray = re.exec(data)) !== null)
{
console.log("date:"+ myArray[1]);
console.log("time:"+ myArray[2]);
console.log("place:"+ myArray[3]);
console.log("Home:"+ myArray[4]);
console.log("Away:"+ myArray[5]);
}
}
但是我没有得到作为Capture Group 5的Away-Team! 我的输出:
date:26/06
time:22:00
place:Curitiba
Home:Algerien
Away:undefined
只有当我没有使用" |":
制作替代表达时,我才能得到它(\d\d\/\d\d)\s(\d\d:\d\d)\s(.+)\s\s\s(.+)\s-\s(.+)
或者当我使用" [" "]"而不是"("和")"用于分组备选方案。
有什么问题? 它是Nodejs正则表达式引擎中的一个错误,因为它忽略了最后一个捕获组!?要么 是正则表达式错了吗?
最诚挚的问候 迈克尔
答案 0 :(得分:1)
您的问题可能只是捕获组的问题
这个正则表达式不会改变你的原文(我对你的数据不够了解)
只是改变捕获组。
编辑 - 这适用于您的测试数据。它是相同的正则表达式,但添加了一些空白部分。
# /[^\S\r\n]*(?:\r?\n)(\d\d\/\d\d)[^\S\r\n]+(\d\d:\d\d)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)(?:-|(\d):(\d)[^\S\r\n]+\(\d:\d\))[^\S\r\n]*(?:\r?\n)(.+)/
[^\S\r\n]*
(?: \r? \n ) # linebreak
# ---------------
( \d\d / \d\d ) # (1), Date
[^\S\r\n]+
( \d\d : \d\d ) # (2), Time
[^\S\r\n]*
(?: \r? \n ) # linebreak
# ---------------
( .+ ) # (3), Place
(?: \r? \n ) # linebreak
# ---------------
[^\S\r\n]* # blank line
(?: \r? \n ) # linebreak
# ---------------
( .+ ) # (4), Home
(?: \r? \n ) # linebreak
# ---------------
(?:
- # No score
| # or,
( \d ) # (5), Score home
: # :
( \d ) # (6), Score away
[^\S\r\n]+
\( \d : \d \)
)
[^\S\r\n]*
(?: \r? \n ) # linebreak
# ---------------
( .+ ) # (7), Away
未经测试的JS代码
var pattern = /[^\S\r\n]*(?:\r?\n)(\d\d\/\d\d)[^\S\r\n]+(\d\d:\d\d)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)(?:-|(\d):(\d)[^\S\r\n]+\(\d:\d\))[^\S\r\n]*(?:\r?\n)(.+)/g;
while ((match = pattern.exec( data )) != null)
{
console.log( "\n" );
console.log( "Date: " + match[1] + "\n";
console.log( "Time: " + match[2] + "\n";
console.log( "Place: " + match[3] + "\n";
console.log( "Home: " + match[4] + "\n";
console.log( "Away: " + match[7] + "\n";
console.log( "Score: ";
if (match[5] != null) {
console.log( match[5] + " to " + match[6] + "\n";
}
else {
console.log( "no info\n";
}
}
Perl测试用例
$/ = undef;
$str = <DATA>;
while ( $str =~ /[^\S\r\n]*(?:\r?\n)(\d\d\/\d\d)[^\S\r\n]+(\d\d:\d\d)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)[^\S\r\n]*(?:\r?\n)(.+)(?:\r?\n)(?:-|(\d):(\d)[^\S\r\n]+\(\d:\d\))[^\S\r\n]*(?:\r?\n)(.+)/g )
{
print "\n";
print "Date: $1\n";
print "Time: $2\n";
print "Place: $3\n";
print "Home: $4\n";
print "Away: $7\n";
print "Score: ";
if ( defined $5 ) {
print "$5 to $6\n";
}
else {
print "no info\n";
}
}
__DATA__
1
15/06 16:00
Brasília
Brasilien
3:0 (1:0)
Japan
2
23/06 16:00
Recife
Uruguay
-
Tahiti
输出&gt;&gt;
Date: 15/06
Time: 16:00
Place: Brasflia
Home: Brasilien
Away: Japan
Score: 3 to 0
Date: 23/06
Time: 16:00
Place: Recife
Home: Uruguay
Away: Tahiti
Score: no info