以下JavaScript是我试图提取的html页面的一部分。我正在使用Java来实现这一点,并且我提供了我正在使用的方法。
以下是我想从HTML页面中提取的JavaScript
window.arMailRuMessages = [];
arMailRuMessages = (function() {
var k = 1024,
u = ajs.Html.unescape,
m = function(data) {
try {
return u(decodeURIComponent(data.text));
} catch (e) {}
return '';
};
return [
{
id: "14412430340000000392",
prev: "",
next: "14412428590000000596",
subject: u("hi"),
date: "1441243034",
size: "3" | 0,
folder: "0",
correspondents: {
from: [{
name: u("firstname lastname"),
email: u("firstname@gmail.com"),
avatars: {
"default": u("\/\/filin.mail.ru\/pic?email=firstname%40gmail.com&trust=true&user=firstname%40mail.ru&sign=CA0D4E8E74E806A459EA9C793CE8BC665EB2D049")
}
}],
to: [{
name: u(""),
email: u("firstname6000@mail.ru"),
avatars: {
"default": u("")
}
}],
cc: []
},
flags: {
spf: true,
unread: true,
flagged: false,
reply: false,
forward: false,
attach: false
},
snippet: m({
"ntype": "letter",
"text": "thisisaford"
}),
priority: 3
}, {
id: "14412428590000000596",
prev: "14412430340000000392",
next: "",
subject: u("hi"),
date: "1441242859",
size: "3" | 0,
folder: "0",
correspondents: {
from: [{
name: u("firstname lastname"),
email: u("firstname@gmail.com"),
avatars: {
"default": u("\/\/filin.mail.ru\/pic?email=firstname%40gmail.com&trust=true&user=firstname%40mail.ru&sign=CA0D4E8E74E806A459EA9C793CE8BC665EB2D049")
}
}],
to: [{
name: u(""),
email: u("firstname@mail.ru"),
avatars: {
"default": u("")
}
}],
cc: []
},
flags: {
spf: true,
unread: true,
flagged: false,
reply: false,
forward: false,
attach: false
},
snippet: m({
"ntype": "letter",
"text": "thisisatest"
}),
priority: 3
}
];
})();
__log.letters_data_js = 1;
</script>
我已创建此Java方法以从HTML页面中提取脚本
public String jsMessages(String content)
{
String result = "";
String pattern = "window.arMailRuMessages((.|\\n)*)__log.letters_data_js";
Pattern r = Pattern.compile(pattern);
Matcher m = r.matcher(content);
if (m.find())
{
// System.out.println("Found value: " + m.group(1));
result = m.group(1);
}
else
{
System.out.println("NO MATCH");
}
return result;
}
当我运行程序时,我收到以下错误
Exception in thread "Thread-0" java.lang.StackOverflowError
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
at java.util.regex.Pattern$CharProperty.match(Unknown Source)
at java.util.regex.Pattern$Branch.match(Unknown Source)
at java.util.regex.Pattern$GroupHead.match(Unknown Source)
at java.util.regex.Pattern$Loop.match(Unknown Source)
at java.util.regex.Pattern$GroupTail.match(Unknown Source)
at java.util.regex.Pattern$BranchConn.match(Unknown Source)
知道我做错了什么吗?或者更好的方法吗?
答案 0 :(得分:4)
请勿使用(.|\\n)*
。使用.*
和Pattern.compile(pattern, Pattern.DOTALL)
。
根据您的使用情况,您可能希望将其设为“不情愿”而不是“贪婪”:.*?
。
也应该提升表现。